embedaddon/libxml2/parserInternals.c - annotate

Return to parserInternals.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2
Annotation of embedaddon/libxml2/parserInternals.c, revision 1.1.1.3

1.1       misho       1: /*
                      2:  * parserInternals.c : Internal routines (and obsolete ones) needed for the
                      3:  *                     XML and HTML parsers.
                      4:  *
                      5:  * See Copyright for the status of this software.
                      6:  *
                      7:  * daniel@veillard.com
                      8:  */
                      9: 
                     10: #define IN_LIBXML
                     11: #include "libxml.h"
                     12: 
                     13: #if defined(WIN32) && !defined (__CYGWIN__)
                     14: #define XML_DIR_SEP '\\'
                     15: #else
                     16: #define XML_DIR_SEP '/'
                     17: #endif
                     18: 
                     19: #include <string.h>
                     20: #ifdef HAVE_CTYPE_H
                     21: #include <ctype.h>
                     22: #endif
                     23: #ifdef HAVE_STDLIB_H
                     24: #include <stdlib.h>
                     25: #endif
                     26: #ifdef HAVE_SYS_STAT_H
                     27: #include <sys/stat.h>
                     28: #endif
                     29: #ifdef HAVE_FCNTL_H
                     30: #include <fcntl.h>
                     31: #endif
                     32: #ifdef HAVE_UNISTD_H
                     33: #include <unistd.h>
                     34: #endif
                     35: #ifdef HAVE_ZLIB_H
                     36: #include <zlib.h>
                     37: #endif
                     38: 
                     39: #include <libxml/xmlmemory.h>
                     40: #include <libxml/tree.h>
                     41: #include <libxml/parser.h>
                     42: #include <libxml/parserInternals.h>
                     43: #include <libxml/valid.h>
                     44: #include <libxml/entities.h>
                     45: #include <libxml/xmlerror.h>
                     46: #include <libxml/encoding.h>
                     47: #include <libxml/valid.h>
                     48: #include <libxml/xmlIO.h>
                     49: #include <libxml/uri.h>
                     50: #include <libxml/dict.h>
                     51: #include <libxml/SAX.h>
                     52: #ifdef LIBXML_CATALOG_ENABLED
                     53: #include <libxml/catalog.h>
                     54: #endif
                     55: #include <libxml/globals.h>
                     56: #include <libxml/chvalid.h>
                     57: 
1.1.1.3 ! misho      58: #include "buf.h"
        !            59: #include "enc.h"
        !            60: 
1.1       misho      61: /*
                     62:  * Various global defaults for parsing
                     63:  */
                     64: 
                     65: /**
                     66:  * xmlCheckVersion:
                     67:  * @version: the include version number
                     68:  *
                     69:  * check the compiled lib version against the include one.
                     70:  * This can warn or immediately kill the application
                     71:  */
                     72: void
                     73: xmlCheckVersion(int version) {
                     74:     int myversion = (int) LIBXML_VERSION;
                     75: 
                     76:     xmlInitParser();
                     77: 
                     78:     if ((myversion / 10000) != (version / 10000)) {
1.1.1.3 ! misho      79:        xmlGenericError(xmlGenericErrorContext,
1.1       misho      80:                "Fatal: program compiled against libxml %d using libxml %d\n",
                     81:                (version / 10000), (myversion / 10000));
1.1.1.3 ! misho      82:        fprintf(stderr,
1.1       misho      83:                "Fatal: program compiled against libxml %d using libxml %d\n",
                     84:                (version / 10000), (myversion / 10000));
                     85:     }
                     86:     if ((myversion / 100) < (version / 100)) {
1.1.1.3 ! misho      87:        xmlGenericError(xmlGenericErrorContext,
1.1       misho      88:                "Warning: program compiled against libxml %d using older %d\n",
                     89:                (version / 100), (myversion / 100));
                     90:     }
                     91: }
                     92: 
                     93: 
                     94: /************************************************************************
                     95:  *                                                                     *
1.1.1.3 ! misho      96:  *             Some factorized error routines                          *
1.1       misho      97:  *                                                                     *
                     98:  ************************************************************************/
                     99: 
                    100: 
                    101: /**
                    102:  * xmlErrMemory:
                    103:  * @ctxt:  an XML parser context
                    104:  * @extra:  extra informations
                    105:  *
                    106:  * Handle a redefinition of attribute error
                    107:  */
                    108: void
                    109: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
                    110: {
                    111:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    112:         (ctxt->instate == XML_PARSER_EOF))
                    113:        return;
                    114:     if (ctxt != NULL) {
                    115:         ctxt->errNo = XML_ERR_NO_MEMORY;
                    116:         ctxt->instate = XML_PARSER_EOF;
                    117:         ctxt->disableSAX = 1;
                    118:     }
                    119:     if (extra)
                    120:         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                    121:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
                    122:                         NULL, NULL, 0, 0,
                    123:                         "Memory allocation failed : %s\n", extra);
                    124:     else
                    125:         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                    126:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
                    127:                         NULL, NULL, 0, 0, "Memory allocation failed\n");
                    128: }
                    129: 
                    130: /**
                    131:  * __xmlErrEncoding:
                    132:  * @ctxt:  an XML parser context
                    133:  * @xmlerr:  the error number
                    134:  * @msg:  the error message
                    135:  * @str1:  an string info
                    136:  * @str2:  an string info
                    137:  *
                    138:  * Handle an encoding error
                    139:  */
                    140: void
                    141: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
                    142:                  const char *msg, const xmlChar * str1, const xmlChar * str2)
                    143: {
                    144:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    145:         (ctxt->instate == XML_PARSER_EOF))
                    146:        return;
                    147:     if (ctxt != NULL)
                    148:         ctxt->errNo = xmlerr;
                    149:     __xmlRaiseError(NULL, NULL, NULL,
                    150:                     ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
                    151:                     NULL, 0, (const char *) str1, (const char *) str2,
                    152:                     NULL, 0, 0, msg, str1, str2);
                    153:     if (ctxt != NULL) {
                    154:         ctxt->wellFormed = 0;
                    155:         if (ctxt->recovery == 0)
                    156:             ctxt->disableSAX = 1;
                    157:     }
                    158: }
                    159: 
                    160: /**
                    161:  * xmlErrInternal:
                    162:  * @ctxt:  an XML parser context
                    163:  * @msg:  the error message
                    164:  * @str:  error informations
                    165:  *
                    166:  * Handle an internal error
                    167:  */
                    168: static void
                    169: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
                    170: {
                    171:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    172:         (ctxt->instate == XML_PARSER_EOF))
                    173:        return;
                    174:     if (ctxt != NULL)
                    175:         ctxt->errNo = XML_ERR_INTERNAL_ERROR;
                    176:     __xmlRaiseError(NULL, NULL, NULL,
                    177:                     ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
                    178:                     XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
                    179:                     0, 0, msg, str);
                    180:     if (ctxt != NULL) {
                    181:         ctxt->wellFormed = 0;
                    182:         if (ctxt->recovery == 0)
                    183:             ctxt->disableSAX = 1;
                    184:     }
                    185: }
                    186: 
                    187: /**
                    188:  * xmlErrEncodingInt:
                    189:  * @ctxt:  an XML parser context
                    190:  * @error:  the error number
                    191:  * @msg:  the error message
                    192:  * @val:  an integer value
                    193:  *
                    194:  * n encoding error
                    195:  */
                    196: static void
                    197: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                    198:                   const char *msg, int val)
                    199: {
                    200:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    201:         (ctxt->instate == XML_PARSER_EOF))
                    202:        return;
                    203:     if (ctxt != NULL)
                    204:         ctxt->errNo = error;
                    205:     __xmlRaiseError(NULL, NULL, NULL,
                    206:                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    207:                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
                    208:     if (ctxt != NULL) {
                    209:         ctxt->wellFormed = 0;
                    210:         if (ctxt->recovery == 0)
                    211:             ctxt->disableSAX = 1;
                    212:     }
                    213: }
                    214: 
                    215: /**
                    216:  * xmlIsLetter:
                    217:  * @c:  an unicode character (int)
                    218:  *
                    219:  * Check whether the character is allowed by the production
                    220:  * [84] Letter ::= BaseChar | Ideographic
                    221:  *
                    222:  * Returns 0 if not, non-zero otherwise
                    223:  */
                    224: int
                    225: xmlIsLetter(int c) {
                    226:     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
                    227: }
                    228: 
                    229: /************************************************************************
                    230:  *                                                                     *
1.1.1.3 ! misho     231:  *             Input handling functions for progressive parsing        *
1.1       misho     232:  *                                                                     *
                    233:  ************************************************************************/
                    234: 
                    235: /* #define DEBUG_INPUT */
                    236: /* #define DEBUG_STACK */
                    237: /* #define DEBUG_PUSH */
                    238: 
                    239: 
                    240: /* we need to keep enough input to show errors in context */
                    241: #define LINE_LEN        80
                    242: 
                    243: #ifdef DEBUG_INPUT
                    244: #define CHECK_BUFFER(in) check_buffer(in)
                    245: 
                    246: static
                    247: void check_buffer(xmlParserInputPtr in) {
1.1.1.3 ! misho     248:     if (in->base != xmlBufContent(in->buf->buffer)) {
1.1       misho     249:         xmlGenericError(xmlGenericErrorContext,
                    250:                "xmlParserInput: base mismatch problem\n");
                    251:     }
                    252:     if (in->cur < in->base) {
                    253:         xmlGenericError(xmlGenericErrorContext,
                    254:                "xmlParserInput: cur < base problem\n");
                    255:     }
1.1.1.3 ! misho     256:     if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
1.1       misho     257:         xmlGenericError(xmlGenericErrorContext,
                    258:                "xmlParserInput: cur > base + use problem\n");
                    259:     }
1.1.1.3 ! misho     260:     xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
        !           261:             (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
        !           262:            xmlBufUse(in->buf->buffer));
1.1       misho     263: }
                    264: 
                    265: #else
1.1.1.3 ! misho     266: #define CHECK_BUFFER(in)
1.1       misho     267: #endif
                    268: 
                    269: 
                    270: /**
                    271:  * xmlParserInputRead:
                    272:  * @in:  an XML parser input
                    273:  * @len:  an indicative size for the lookahead
                    274:  *
1.1.1.3 ! misho     275:  * This function was internal and is deprecated.
1.1       misho     276:  *
1.1.1.3 ! misho     277:  * Returns -1 as this is an error to use it.
1.1       misho     278:  */
                    279: int
1.1.1.3 ! misho     280: xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
        !           281:     return(-1);
1.1       misho     282: }
                    283: 
                    284: /**
                    285:  * xmlParserInputGrow:
                    286:  * @in:  an XML parser input
                    287:  * @len:  an indicative size for the lookahead
                    288:  *
                    289:  * This function increase the input for the parser. It tries to
                    290:  * preserve pointers to the input buffer, and keep already read data
                    291:  *
1.1.1.3 ! misho     292:  * Returns the amount of char read, or -1 in case of error, 0 indicate the
1.1       misho     293:  * end of this entity
                    294:  */
                    295: int
                    296: xmlParserInputGrow(xmlParserInputPtr in, int len) {
1.1.1.3 ! misho     297:     size_t ret;
        !           298:     size_t indx;
        !           299:     const xmlChar *content;
1.1       misho     300: 
1.1.1.3 ! misho     301:     if ((in == NULL) || (len < 0)) return(-1);
1.1       misho     302: #ifdef DEBUG_INPUT
                    303:     xmlGenericError(xmlGenericErrorContext, "Grow\n");
                    304: #endif
                    305:     if (in->buf == NULL) return(-1);
                    306:     if (in->base == NULL) return(-1);
                    307:     if (in->cur == NULL) return(-1);
                    308:     if (in->buf->buffer == NULL) return(-1);
                    309: 
                    310:     CHECK_BUFFER(in);
                    311: 
                    312:     indx = in->cur - in->base;
1.1.1.3 ! misho     313:     if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
1.1       misho     314: 
                    315:        CHECK_BUFFER(in);
                    316: 
                    317:         return(0);
                    318:     }
1.1.1.3 ! misho     319:     if (in->buf->readcallback != NULL) {
1.1       misho     320:        ret = xmlParserInputBufferGrow(in->buf, len);
1.1.1.3 ! misho     321:     } else
1.1       misho     322:         return(0);
                    323: 
                    324:     /*
                    325:      * NOTE : in->base may be a "dangling" i.e. freed pointer in this
                    326:      *        block, but we use it really as an integer to do some
                    327:      *        pointer arithmetic. Insure will raise it as a bug but in
                    328:      *        that specific case, that's not !
                    329:      */
1.1.1.3 ! misho     330: 
        !           331:     content = xmlBufContent(in->buf->buffer);
        !           332:     if (in->base != content) {
1.1       misho     333:         /*
                    334:         * the buffer has been reallocated
                    335:         */
                    336:        indx = in->cur - in->base;
1.1.1.3 ! misho     337:        in->base = content;
        !           338:        in->cur = &content[indx];
1.1       misho     339:     }
1.1.1.3 ! misho     340:     in->end = xmlBufEnd(in->buf->buffer);
1.1       misho     341: 
                    342:     CHECK_BUFFER(in);
                    343: 
                    344:     return(ret);
                    345: }
                    346: 
                    347: /**
                    348:  * xmlParserInputShrink:
                    349:  * @in:  an XML parser input
                    350:  *
                    351:  * This function removes used input for the parser.
                    352:  */
                    353: void
                    354: xmlParserInputShrink(xmlParserInputPtr in) {
1.1.1.3 ! misho     355:     size_t used;
        !           356:     size_t ret;
        !           357:     size_t indx;
        !           358:     const xmlChar *content;
1.1       misho     359: 
                    360: #ifdef DEBUG_INPUT
                    361:     xmlGenericError(xmlGenericErrorContext, "Shrink\n");
                    362: #endif
                    363:     if (in == NULL) return;
                    364:     if (in->buf == NULL) return;
                    365:     if (in->base == NULL) return;
                    366:     if (in->cur == NULL) return;
                    367:     if (in->buf->buffer == NULL) return;
                    368: 
                    369:     CHECK_BUFFER(in);
                    370: 
1.1.1.3 ! misho     371:     used = in->cur - xmlBufContent(in->buf->buffer);
1.1       misho     372:     /*
                    373:      * Do not shrink on large buffers whose only a tiny fraction
                    374:      * was consumed
                    375:      */
                    376:     if (used > INPUT_CHUNK) {
1.1.1.3 ! misho     377:        ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
1.1       misho     378:        if (ret > 0) {
                    379:            in->cur -= ret;
                    380:            in->consumed += ret;
                    381:        }
1.1.1.3 ! misho     382:        in->end = xmlBufEnd(in->buf->buffer);
1.1       misho     383:     }
                    384: 
                    385:     CHECK_BUFFER(in);
                    386: 
1.1.1.3 ! misho     387:     if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
1.1       misho     388:         return;
                    389:     }
                    390:     xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1.1.1.3 ! misho     391:     content = xmlBufContent(in->buf->buffer);
        !           392:     if (in->base != content) {
1.1       misho     393:         /*
                    394:         * the buffer has been reallocated
                    395:         */
                    396:        indx = in->cur - in->base;
1.1.1.3 ! misho     397:        in->base = content;
        !           398:        in->cur = &content[indx];
1.1       misho     399:     }
1.1.1.3 ! misho     400:     in->end = xmlBufEnd(in->buf->buffer);
1.1       misho     401: 
                    402:     CHECK_BUFFER(in);
                    403: }
                    404: 
                    405: /************************************************************************
                    406:  *                                                                     *
1.1.1.3 ! misho     407:  *             UTF8 character input and related functions              *
1.1       misho     408:  *                                                                     *
                    409:  ************************************************************************/
                    410: 
                    411: /**
                    412:  * xmlNextChar:
                    413:  * @ctxt:  the XML parser context
                    414:  *
                    415:  * Skip to the next char input char.
                    416:  */
                    417: 
                    418: void
                    419: xmlNextChar(xmlParserCtxtPtr ctxt)
                    420: {
                    421:     if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
                    422:         (ctxt->input == NULL))
                    423:         return;
                    424: 
                    425:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    426:         if ((*ctxt->input->cur == 0) &&
                    427:             (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
                    428:             (ctxt->instate != XML_PARSER_COMMENT)) {
                    429:             /*
                    430:              * If we are at the end of the current entity and
                    431:              * the context allows it, we pop consumed entities
                    432:              * automatically.
                    433:              * the auto closing should be blocked in other cases
                    434:              */
                    435:             xmlPopInput(ctxt);
                    436:         } else {
                    437:             const unsigned char *cur;
                    438:             unsigned char c;
                    439: 
                    440:             /*
                    441:              *   2.11 End-of-Line Handling
                    442:              *   the literal two-character sequence "#xD#xA" or a standalone
                    443:              *   literal #xD, an XML processor must pass to the application
                    444:              *   the single character #xA.
                    445:              */
                    446:             if (*(ctxt->input->cur) == '\n') {
                    447:                 ctxt->input->line++; ctxt->input->col = 1;
                    448:             } else
                    449:                 ctxt->input->col++;
                    450: 
                    451:             /*
                    452:              * We are supposed to handle UTF8, check it's valid
                    453:              * From rfc2044: encoding of the Unicode values on UTF-8:
                    454:              *
                    455:              * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    456:              * 0000 0000-0000 007F   0xxxxxxx
                    457:              * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1.1.1.3 ! misho     458:              * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1.1       misho     459:              *
                    460:              * Check for the 0x110000 limit too
                    461:              */
                    462:             cur = ctxt->input->cur;
                    463: 
                    464:             c = *cur;
                    465:             if (c & 0x80) {
                    466:                if (c == 0xC0)
                    467:                    goto encoding_error;
                    468:                 if (cur[1] == 0) {
                    469:                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    470:                     cur = ctxt->input->cur;
                    471:                 }
                    472:                 if ((cur[1] & 0xc0) != 0x80)
                    473:                     goto encoding_error;
                    474:                 if ((c & 0xe0) == 0xe0) {
                    475:                     unsigned int val;
                    476: 
                    477:                     if (cur[2] == 0) {
                    478:                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    479:                         cur = ctxt->input->cur;
                    480:                     }
                    481:                     if ((cur[2] & 0xc0) != 0x80)
                    482:                         goto encoding_error;
                    483:                     if ((c & 0xf0) == 0xf0) {
                    484:                         if (cur[3] == 0) {
                    485:                             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    486:                             cur = ctxt->input->cur;
                    487:                         }
                    488:                         if (((c & 0xf8) != 0xf0) ||
                    489:                             ((cur[3] & 0xc0) != 0x80))
                    490:                             goto encoding_error;
                    491:                         /* 4-byte code */
                    492:                         ctxt->input->cur += 4;
                    493:                         val = (cur[0] & 0x7) << 18;
                    494:                         val |= (cur[1] & 0x3f) << 12;
                    495:                         val |= (cur[2] & 0x3f) << 6;
                    496:                         val |= cur[3] & 0x3f;
                    497:                     } else {
                    498:                         /* 3-byte code */
                    499:                         ctxt->input->cur += 3;
                    500:                         val = (cur[0] & 0xf) << 12;
                    501:                         val |= (cur[1] & 0x3f) << 6;
                    502:                         val |= cur[2] & 0x3f;
                    503:                     }
                    504:                     if (((val > 0xd7ff) && (val < 0xe000)) ||
                    505:                         ((val > 0xfffd) && (val < 0x10000)) ||
                    506:                         (val >= 0x110000)) {
                    507:                        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    508:                                          "Char 0x%X out of allowed range\n",
                    509:                                          val);
                    510:                     }
                    511:                 } else
                    512:                     /* 2-byte code */
                    513:                     ctxt->input->cur += 2;
                    514:             } else
                    515:                 /* 1-byte code */
                    516:                 ctxt->input->cur++;
                    517: 
                    518:             ctxt->nbChars++;
                    519:             if (*ctxt->input->cur == 0)
                    520:                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    521:         }
                    522:     } else {
                    523:         /*
                    524:          * Assume it's a fixed length encoding (1) with
                    525:          * a compatible encoding for the ASCII set, since
                    526:          * XML constructs only use < 128 chars
                    527:          */
                    528: 
                    529:         if (*(ctxt->input->cur) == '\n') {
                    530:             ctxt->input->line++; ctxt->input->col = 1;
                    531:         } else
                    532:             ctxt->input->col++;
                    533:         ctxt->input->cur++;
                    534:         ctxt->nbChars++;
                    535:         if (*ctxt->input->cur == 0)
                    536:             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    537:     }
                    538:     if ((*ctxt->input->cur == '%') && (!ctxt->html))
                    539:         xmlParserHandlePEReference(ctxt);
                    540:     if ((*ctxt->input->cur == 0) &&
                    541:         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
                    542:         xmlPopInput(ctxt);
                    543:     return;
                    544: encoding_error:
                    545:     /*
                    546:      * If we detect an UTF8 error that probably mean that the
                    547:      * input encoding didn't get properly advertised in the
                    548:      * declaration header. Report the error and switch the encoding
                    549:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    550:      * encoding !)
                    551:      */
                    552:     if ((ctxt == NULL) || (ctxt->input == NULL) ||
                    553:         (ctxt->input->end - ctxt->input->cur < 4)) {
                    554:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    555:                     "Input is not proper UTF-8, indicate encoding !\n",
                    556:                     NULL, NULL);
                    557:     } else {
                    558:         char buffer[150];
                    559: 
                    560:        snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    561:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    562:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    563:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    564:                     "Input is not proper UTF-8, indicate encoding !\n%s",
                    565:                     BAD_CAST buffer, NULL);
                    566:     }
                    567:     ctxt->charset = XML_CHAR_ENCODING_8859_1;
                    568:     ctxt->input->cur++;
                    569:     return;
                    570: }
                    571: 
                    572: /**
                    573:  * xmlCurrentChar:
                    574:  * @ctxt:  the XML parser context
                    575:  * @len:  pointer to the length of the char read
                    576:  *
                    577:  * The current char value, if using UTF-8 this may actually span multiple
                    578:  * bytes in the input buffer. Implement the end of line normalization:
                    579:  * 2.11 End-of-Line Handling
                    580:  * Wherever an external parsed entity or the literal entity value
                    581:  * of an internal parsed entity contains either the literal two-character
                    582:  * sequence "#xD#xA" or a standalone literal #xD, an XML processor
                    583:  * must pass to the application the single character #xA.
                    584:  * This behavior can conveniently be produced by normalizing all
                    585:  * line breaks to #xA on input, before parsing.)
                    586:  *
                    587:  * Returns the current char value and its length
                    588:  */
                    589: 
                    590: int
                    591: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                    592:     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
                    593:     if (ctxt->instate == XML_PARSER_EOF)
                    594:        return(0);
                    595: 
                    596:     if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
                    597:            *len = 1;
                    598:            return((int) *ctxt->input->cur);
                    599:     }
                    600:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    601:        /*
                    602:         * We are supposed to handle UTF8, check it's valid
                    603:         * From rfc2044: encoding of the Unicode values on UTF-8:
                    604:         *
                    605:         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    606:         * 0000 0000-0000 007F   0xxxxxxx
                    607:         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1.1.1.3 ! misho     608:         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1.1       misho     609:         *
                    610:         * Check for the 0x110000 limit too
                    611:         */
                    612:        const unsigned char *cur = ctxt->input->cur;
                    613:        unsigned char c;
                    614:        unsigned int val;
                    615: 
                    616:        c = *cur;
                    617:        if (c & 0x80) {
                    618:            if (((c & 0x40) == 0) || (c == 0xC0))
                    619:                goto encoding_error;
                    620:            if (cur[1] == 0) {
                    621:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    622:                 cur = ctxt->input->cur;
                    623:             }
                    624:            if ((cur[1] & 0xc0) != 0x80)
                    625:                goto encoding_error;
                    626:            if ((c & 0xe0) == 0xe0) {
                    627:                if (cur[2] == 0) {
                    628:                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    629:                     cur = ctxt->input->cur;
                    630:                 }
                    631:                if ((cur[2] & 0xc0) != 0x80)
                    632:                    goto encoding_error;
                    633:                if ((c & 0xf0) == 0xf0) {
                    634:                    if (cur[3] == 0) {
                    635:                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    636:                         cur = ctxt->input->cur;
                    637:                     }
                    638:                    if (((c & 0xf8) != 0xf0) ||
                    639:                        ((cur[3] & 0xc0) != 0x80))
                    640:                        goto encoding_error;
                    641:                    /* 4-byte code */
                    642:                    *len = 4;
                    643:                    val = (cur[0] & 0x7) << 18;
                    644:                    val |= (cur[1] & 0x3f) << 12;
                    645:                    val |= (cur[2] & 0x3f) << 6;
                    646:                    val |= cur[3] & 0x3f;
                    647:                    if (val < 0x10000)
                    648:                        goto encoding_error;
                    649:                } else {
                    650:                  /* 3-byte code */
                    651:                    *len = 3;
                    652:                    val = (cur[0] & 0xf) << 12;
                    653:                    val |= (cur[1] & 0x3f) << 6;
                    654:                    val |= cur[2] & 0x3f;
                    655:                    if (val < 0x800)
                    656:                        goto encoding_error;
                    657:                }
                    658:            } else {
                    659:              /* 2-byte code */
                    660:                *len = 2;
                    661:                val = (cur[0] & 0x1f) << 6;
                    662:                val |= cur[1] & 0x3f;
                    663:                if (val < 0x80)
                    664:                    goto encoding_error;
                    665:            }
                    666:            if (!IS_CHAR(val)) {
                    667:                xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    668:                                  "Char 0x%X out of allowed range\n", val);
1.1.1.3 ! misho     669:            }
1.1       misho     670:            return(val);
                    671:        } else {
                    672:            /* 1-byte code */
                    673:            *len = 1;
                    674:            if (*ctxt->input->cur == 0)
                    675:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    676:            if ((*ctxt->input->cur == 0) &&
                    677:                (ctxt->input->end > ctxt->input->cur)) {
                    678:                xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    679:                                  "Char 0x0 out of allowed range\n", 0);
                    680:            }
                    681:            if (*ctxt->input->cur == 0xD) {
                    682:                if (ctxt->input->cur[1] == 0xA) {
                    683:                    ctxt->nbChars++;
                    684:                    ctxt->input->cur++;
                    685:                }
                    686:                return(0xA);
                    687:            }
                    688:            return((int) *ctxt->input->cur);
                    689:        }
                    690:     }
                    691:     /*
                    692:      * Assume it's a fixed length encoding (1) with
                    693:      * a compatible encoding for the ASCII set, since
                    694:      * XML constructs only use < 128 chars
                    695:      */
                    696:     *len = 1;
                    697:     if (*ctxt->input->cur == 0xD) {
                    698:        if (ctxt->input->cur[1] == 0xA) {
                    699:            ctxt->nbChars++;
                    700:            ctxt->input->cur++;
                    701:        }
                    702:        return(0xA);
                    703:     }
                    704:     return((int) *ctxt->input->cur);
                    705: encoding_error:
                    706:     /*
                    707:      * An encoding problem may arise from a truncated input buffer
                    708:      * splitting a character in the middle. In that case do not raise
                    709:      * an error but return 0 to endicate an end of stream problem
                    710:      */
                    711:     if (ctxt->input->end - ctxt->input->cur < 4) {
                    712:        *len = 0;
                    713:        return(0);
                    714:     }
                    715: 
                    716:     /*
                    717:      * If we detect an UTF8 error that probably mean that the
                    718:      * input encoding didn't get properly advertised in the
                    719:      * declaration header. Report the error and switch the encoding
                    720:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    721:      * encoding !)
                    722:      */
                    723:     {
                    724:         char buffer[150];
                    725: 
                    726:        snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    727:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    728:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    729:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    730:                     "Input is not proper UTF-8, indicate encoding !\n%s",
                    731:                     BAD_CAST buffer, NULL);
                    732:     }
1.1.1.3 ! misho     733:     ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.1       misho     734:     *len = 1;
                    735:     return((int) *ctxt->input->cur);
                    736: }
                    737: 
                    738: /**
                    739:  * xmlStringCurrentChar:
                    740:  * @ctxt:  the XML parser context
                    741:  * @cur:  pointer to the beginning of the char
                    742:  * @len:  pointer to the length of the char read
                    743:  *
                    744:  * The current char value, if using UTF-8 this may actually span multiple
                    745:  * bytes in the input buffer.
                    746:  *
                    747:  * Returns the current char value and its length
                    748:  */
                    749: 
                    750: int
                    751: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
                    752: {
                    753:     if ((len == NULL) || (cur == NULL)) return(0);
                    754:     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
                    755:         /*
                    756:          * We are supposed to handle UTF8, check it's valid
                    757:          * From rfc2044: encoding of the Unicode values on UTF-8:
                    758:          *
                    759:          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    760:          * 0000 0000-0000 007F   0xxxxxxx
                    761:          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1.1.1.3 ! misho     762:          * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1.1       misho     763:          *
                    764:          * Check for the 0x110000 limit too
                    765:          */
                    766:         unsigned char c;
                    767:         unsigned int val;
                    768: 
                    769:         c = *cur;
                    770:         if (c & 0x80) {
                    771:             if ((cur[1] & 0xc0) != 0x80)
                    772:                 goto encoding_error;
                    773:             if ((c & 0xe0) == 0xe0) {
                    774: 
                    775:                 if ((cur[2] & 0xc0) != 0x80)
                    776:                     goto encoding_error;
                    777:                 if ((c & 0xf0) == 0xf0) {
                    778:                     if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
                    779:                         goto encoding_error;
                    780:                     /* 4-byte code */
                    781:                     *len = 4;
                    782:                     val = (cur[0] & 0x7) << 18;
                    783:                     val |= (cur[1] & 0x3f) << 12;
                    784:                     val |= (cur[2] & 0x3f) << 6;
                    785:                     val |= cur[3] & 0x3f;
                    786:                 } else {
                    787:                     /* 3-byte code */
                    788:                     *len = 3;
                    789:                     val = (cur[0] & 0xf) << 12;
                    790:                     val |= (cur[1] & 0x3f) << 6;
                    791:                     val |= cur[2] & 0x3f;
                    792:                 }
                    793:             } else {
                    794:                 /* 2-byte code */
                    795:                 *len = 2;
                    796:                 val = (cur[0] & 0x1f) << 6;
                    797:                 val |= cur[1] & 0x3f;
                    798:             }
                    799:             if (!IS_CHAR(val)) {
                    800:                xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    801:                                  "Char 0x%X out of allowed range\n", val);
                    802:             }
                    803:             return (val);
                    804:         } else {
                    805:             /* 1-byte code */
                    806:             *len = 1;
                    807:             return ((int) *cur);
                    808:         }
                    809:     }
                    810:     /*
                    811:      * Assume it's a fixed length encoding (1) with
                    812:      * a compatible encoding for the ASCII set, since
                    813:      * XML constructs only use < 128 chars
                    814:      */
                    815:     *len = 1;
                    816:     return ((int) *cur);
                    817: encoding_error:
                    818: 
                    819:     /*
                    820:      * An encoding problem may arise from a truncated input buffer
                    821:      * splitting a character in the middle. In that case do not raise
                    822:      * an error but return 0 to endicate an end of stream problem
                    823:      */
                    824:     if ((ctxt == NULL) || (ctxt->input == NULL) ||
                    825:         (ctxt->input->end - ctxt->input->cur < 4)) {
                    826:        *len = 0;
                    827:        return(0);
                    828:     }
                    829:     /*
                    830:      * If we detect an UTF8 error that probably mean that the
                    831:      * input encoding didn't get properly advertised in the
                    832:      * declaration header. Report the error and switch the encoding
                    833:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    834:      * encoding !)
                    835:      */
                    836:     {
                    837:         char buffer[150];
                    838: 
                    839:        snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    840:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    841:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    842:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    843:                     "Input is not proper UTF-8, indicate encoding !\n%s",
                    844:                     BAD_CAST buffer, NULL);
                    845:     }
                    846:     *len = 1;
                    847:     return ((int) *cur);
                    848: }
                    849: 
                    850: /**
                    851:  * xmlCopyCharMultiByte:
                    852:  * @out:  pointer to an array of xmlChar
                    853:  * @val:  the char value
                    854:  *
1.1.1.3 ! misho     855:  * append the char value in the array
1.1       misho     856:  *
                    857:  * Returns the number of xmlChar written
                    858:  */
                    859: int
                    860: xmlCopyCharMultiByte(xmlChar *out, int val) {
                    861:     if (out == NULL) return(0);
                    862:     /*
                    863:      * We are supposed to handle UTF8, check it's valid
                    864:      * From rfc2044: encoding of the Unicode values on UTF-8:
                    865:      *
                    866:      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    867:      * 0000 0000-0000 007F   0xxxxxxx
                    868:      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1.1.1.3 ! misho     869:      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1.1       misho     870:      */
                    871:     if  (val >= 0x80) {
                    872:        xmlChar *savedout = out;
                    873:        int bits;
                    874:        if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
                    875:        else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
                    876:        else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
                    877:        else {
                    878:            xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
                    879:                    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
                    880:                              val);
                    881:            return(0);
                    882:        }
                    883:        for ( ; bits >= 0; bits-= 6)
                    884:            *out++= ((val >> bits) & 0x3F) | 0x80 ;
                    885:        return (out - savedout);
                    886:     }
                    887:     *out = (xmlChar) val;
                    888:     return 1;
                    889: }
                    890: 
                    891: /**
                    892:  * xmlCopyChar:
                    893:  * @len:  Ignored, compatibility
                    894:  * @out:  pointer to an array of xmlChar
                    895:  * @val:  the char value
                    896:  *
1.1.1.3 ! misho     897:  * append the char value in the array
1.1       misho     898:  *
                    899:  * Returns the number of xmlChar written
                    900:  */
                    901: 
                    902: int
                    903: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
                    904:     if (out == NULL) return(0);
                    905:     /* the len parameter is ignored */
                    906:     if  (val >= 0x80) {
                    907:        return(xmlCopyCharMultiByte (out, val));
                    908:     }
                    909:     *out = (xmlChar) val;
                    910:     return 1;
                    911: }
                    912: 
                    913: /************************************************************************
                    914:  *                                                                     *
                    915:  *             Commodity functions to switch encodings                 *
                    916:  *                                                                     *
                    917:  ************************************************************************/
                    918: 
                    919: static int
                    920: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
                    921:                        xmlCharEncodingHandlerPtr handler, int len);
                    922: static int
                    923: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                    924:                           xmlCharEncodingHandlerPtr handler, int len);
                    925: /**
                    926:  * xmlSwitchEncoding:
                    927:  * @ctxt:  the parser context
                    928:  * @enc:  the encoding value (number)
                    929:  *
                    930:  * change the input functions when discovering the character encoding
                    931:  * of a given entity.
                    932:  *
                    933:  * Returns 0 in case of success, -1 otherwise
                    934:  */
                    935: int
                    936: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
                    937: {
                    938:     xmlCharEncodingHandlerPtr handler;
                    939:     int len = -1;
                    940: 
                    941:     if (ctxt == NULL) return(-1);
                    942:     switch (enc) {
                    943:        case XML_CHAR_ENCODING_ERROR:
                    944:            __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
                    945:                           "encoding unknown\n", NULL, NULL);
                    946:            return(-1);
                    947:        case XML_CHAR_ENCODING_NONE:
                    948:            /* let's assume it's UTF-8 without the XML decl */
                    949:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    950:            return(0);
                    951:        case XML_CHAR_ENCODING_UTF8:
                    952:            /* default encoding, no conversion should be needed */
                    953:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    954: 
                    955:            /*
                    956:             * Errata on XML-1.0 June 20 2001
                    957:             * Specific handling of the Byte Order Mark for
                    958:             * UTF-8
                    959:             */
                    960:            if ((ctxt->input != NULL) &&
                    961:                (ctxt->input->cur[0] == 0xEF) &&
                    962:                (ctxt->input->cur[1] == 0xBB) &&
                    963:                (ctxt->input->cur[2] == 0xBF)) {
                    964:                ctxt->input->cur += 3;
                    965:            }
                    966:            return(0);
                    967:     case XML_CHAR_ENCODING_UTF16LE:
                    968:     case XML_CHAR_ENCODING_UTF16BE:
                    969:         /*The raw input characters are encoded
                    970:          *in UTF-16. As we expect this function
                    971:          *to be called after xmlCharEncInFunc, we expect
                    972:          *ctxt->input->cur to contain UTF-8 encoded characters.
                    973:          *So the raw UTF16 Byte Order Mark
                    974:          *has also been converted into
                    975:          *an UTF-8 BOM. Let's skip that BOM.
                    976:          */
                    977:         if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
                    978:             (ctxt->input->cur[0] == 0xEF) &&
                    979:             (ctxt->input->cur[1] == 0xBB) &&
                    980:             (ctxt->input->cur[2] == 0xBF)) {
                    981:             ctxt->input->cur += 3;
                    982:         }
                    983:         len = 90;
                    984:        break;
                    985:     case XML_CHAR_ENCODING_UCS2:
                    986:         len = 90;
                    987:        break;
                    988:     case XML_CHAR_ENCODING_UCS4BE:
                    989:     case XML_CHAR_ENCODING_UCS4LE:
                    990:     case XML_CHAR_ENCODING_UCS4_2143:
                    991:     case XML_CHAR_ENCODING_UCS4_3412:
                    992:         len = 180;
                    993:        break;
                    994:     case XML_CHAR_ENCODING_EBCDIC:
                    995:     case XML_CHAR_ENCODING_8859_1:
                    996:     case XML_CHAR_ENCODING_8859_2:
                    997:     case XML_CHAR_ENCODING_8859_3:
                    998:     case XML_CHAR_ENCODING_8859_4:
                    999:     case XML_CHAR_ENCODING_8859_5:
                   1000:     case XML_CHAR_ENCODING_8859_6:
                   1001:     case XML_CHAR_ENCODING_8859_7:
                   1002:     case XML_CHAR_ENCODING_8859_8:
                   1003:     case XML_CHAR_ENCODING_8859_9:
                   1004:     case XML_CHAR_ENCODING_ASCII:
                   1005:     case XML_CHAR_ENCODING_2022_JP:
                   1006:     case XML_CHAR_ENCODING_SHIFT_JIS:
                   1007:     case XML_CHAR_ENCODING_EUC_JP:
                   1008:         len = 45;
                   1009:        break;
                   1010:     }
                   1011:     handler = xmlGetCharEncodingHandler(enc);
                   1012:     if (handler == NULL) {
                   1013:        /*
                   1014:         * Default handlers.
                   1015:         */
                   1016:        switch (enc) {
                   1017:            case XML_CHAR_ENCODING_ASCII:
                   1018:                /* default encoding, no conversion should be needed */
                   1019:                ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1020:                return(0);
                   1021:            case XML_CHAR_ENCODING_UTF16LE:
                   1022:                break;
                   1023:            case XML_CHAR_ENCODING_UTF16BE:
                   1024:                break;
                   1025:            case XML_CHAR_ENCODING_UCS4LE:
                   1026:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1027:                               "encoding not supported %s\n",
                   1028:                               BAD_CAST "USC4 little endian", NULL);
                   1029:                break;
                   1030:            case XML_CHAR_ENCODING_UCS4BE:
                   1031:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1032:                               "encoding not supported %s\n",
                   1033:                               BAD_CAST "USC4 big endian", NULL);
                   1034:                break;
                   1035:            case XML_CHAR_ENCODING_EBCDIC:
                   1036:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1037:                               "encoding not supported %s\n",
                   1038:                               BAD_CAST "EBCDIC", NULL);
                   1039:                break;
                   1040:            case XML_CHAR_ENCODING_UCS4_2143:
                   1041:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1042:                               "encoding not supported %s\n",
                   1043:                               BAD_CAST "UCS4 2143", NULL);
                   1044:                break;
                   1045:            case XML_CHAR_ENCODING_UCS4_3412:
                   1046:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1047:                               "encoding not supported %s\n",
                   1048:                               BAD_CAST "UCS4 3412", NULL);
                   1049:                break;
                   1050:            case XML_CHAR_ENCODING_UCS2:
                   1051:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1052:                               "encoding not supported %s\n",
                   1053:                               BAD_CAST "UCS2", NULL);
                   1054:                break;
                   1055:            case XML_CHAR_ENCODING_8859_1:
                   1056:            case XML_CHAR_ENCODING_8859_2:
                   1057:            case XML_CHAR_ENCODING_8859_3:
                   1058:            case XML_CHAR_ENCODING_8859_4:
                   1059:            case XML_CHAR_ENCODING_8859_5:
                   1060:            case XML_CHAR_ENCODING_8859_6:
                   1061:            case XML_CHAR_ENCODING_8859_7:
                   1062:            case XML_CHAR_ENCODING_8859_8:
                   1063:            case XML_CHAR_ENCODING_8859_9:
                   1064:                /*
                   1065:                 * We used to keep the internal content in the
                   1066:                 * document encoding however this turns being unmaintainable
                   1067:                 * So xmlGetCharEncodingHandler() will return non-null
                   1068:                 * values for this now.
                   1069:                 */
                   1070:                if ((ctxt->inputNr == 1) &&
                   1071:                    (ctxt->encoding == NULL) &&
                   1072:                    (ctxt->input != NULL) &&
                   1073:                    (ctxt->input->encoding != NULL)) {
                   1074:                    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
                   1075:                }
                   1076:                ctxt->charset = enc;
                   1077:                return(0);
                   1078:            case XML_CHAR_ENCODING_2022_JP:
                   1079:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1080:                               "encoding not supported %s\n",
                   1081:                               BAD_CAST "ISO-2022-JP", NULL);
                   1082:                break;
                   1083:            case XML_CHAR_ENCODING_SHIFT_JIS:
                   1084:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1085:                               "encoding not supported %s\n",
                   1086:                               BAD_CAST "Shift_JIS", NULL);
                   1087:                break;
                   1088:            case XML_CHAR_ENCODING_EUC_JP:
                   1089:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1090:                               "encoding not supported %s\n",
                   1091:                               BAD_CAST "EUC-JP", NULL);
                   1092:                break;
                   1093:            default:
                   1094:                break;
                   1095:        }
                   1096:     }
                   1097:     if (handler == NULL)
                   1098:        return(-1);
                   1099:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1100:     return(xmlSwitchToEncodingInt(ctxt, handler, len));
                   1101: }
                   1102: 
                   1103: /**
                   1104:  * xmlSwitchInputEncoding:
                   1105:  * @ctxt:  the parser context
                   1106:  * @input:  the input stream
                   1107:  * @handler:  the encoding handler
                   1108:  * @len:  the number of bytes to convert for the first line or -1
                   1109:  *
                   1110:  * change the input functions when discovering the character encoding
                   1111:  * of a given entity.
                   1112:  *
                   1113:  * Returns 0 in case of success, -1 otherwise
                   1114:  */
                   1115: static int
                   1116: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                   1117:                           xmlCharEncodingHandlerPtr handler, int len)
                   1118: {
                   1119:     int nbchars;
                   1120: 
                   1121:     if (handler == NULL)
                   1122:         return (-1);
                   1123:     if (input == NULL)
                   1124:         return (-1);
                   1125:     if (input->buf != NULL) {
                   1126:         if (input->buf->encoder != NULL) {
                   1127:             /*
                   1128:              * Check in case the auto encoding detetection triggered
                   1129:              * in already.
                   1130:              */
                   1131:             if (input->buf->encoder == handler)
                   1132:                 return (0);
                   1133: 
                   1134:             /*
                   1135:              * "UTF-16" can be used for both LE and BE
                   1136:              if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
                   1137:              BAD_CAST "UTF-16", 6)) &&
                   1138:              (!xmlStrncmp(BAD_CAST handler->name,
                   1139:              BAD_CAST "UTF-16", 6))) {
                   1140:              return(0);
                   1141:              }
                   1142:              */
                   1143: 
                   1144:             /*
                   1145:              * Note: this is a bit dangerous, but that's what it
                   1146:              * takes to use nearly compatible signature for different
                   1147:              * encodings.
                   1148:              */
                   1149:             xmlCharEncCloseFunc(input->buf->encoder);
                   1150:             input->buf->encoder = handler;
                   1151:             return (0);
                   1152:         }
                   1153:         input->buf->encoder = handler;
                   1154: 
                   1155:         /*
                   1156:          * Is there already some content down the pipe to convert ?
                   1157:          */
1.1.1.3 ! misho    1158:         if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1.1       misho    1159:             int processed;
                   1160:            unsigned int use;
                   1161: 
                   1162:             /*
1.1.1.3 ! misho    1163:              * Specific handling of the Byte Order Mark for
1.1       misho    1164:              * UTF-16
                   1165:              */
                   1166:             if ((handler->name != NULL) &&
                   1167:                 (!strcmp(handler->name, "UTF-16LE") ||
                   1168:                  !strcmp(handler->name, "UTF-16")) &&
                   1169:                 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
                   1170:                 input->cur += 2;
                   1171:             }
                   1172:             if ((handler->name != NULL) &&
                   1173:                 (!strcmp(handler->name, "UTF-16BE")) &&
                   1174:                 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
                   1175:                 input->cur += 2;
                   1176:             }
                   1177:             /*
                   1178:              * Errata on XML-1.0 June 20 2001
                   1179:              * Specific handling of the Byte Order Mark for
                   1180:              * UTF-8
                   1181:              */
                   1182:             if ((handler->name != NULL) &&
                   1183:                 (!strcmp(handler->name, "UTF-8")) &&
                   1184:                 (input->cur[0] == 0xEF) &&
                   1185:                 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
                   1186:                 input->cur += 3;
                   1187:             }
                   1188: 
                   1189:             /*
                   1190:              * Shrink the current input buffer.
                   1191:              * Move it as the raw buffer and create a new input buffer
                   1192:              */
                   1193:             processed = input->cur - input->base;
1.1.1.3 ! misho    1194:             xmlBufShrink(input->buf->buffer, processed);
1.1       misho    1195:             input->buf->raw = input->buf->buffer;
1.1.1.3 ! misho    1196:             input->buf->buffer = xmlBufCreate();
1.1       misho    1197:            input->buf->rawconsumed = processed;
1.1.1.3 ! misho    1198:            use = xmlBufUse(input->buf->raw);
1.1       misho    1199: 
                   1200:             if (ctxt->html) {
                   1201:                 /*
                   1202:                  * convert as much as possible of the buffer
                   1203:                  */
1.1.1.3 ! misho    1204:                 nbchars = xmlCharEncInput(input->buf, 1);
1.1       misho    1205:             } else {
                   1206:                 /*
                   1207:                  * convert just enough to get
                   1208:                  * '<?xml version="1.0" encoding="xxx"?>'
                   1209:                  * parsed with the autodetected encoding
                   1210:                  * into the parser reading buffer.
                   1211:                  */
1.1.1.3 ! misho    1212:                 nbchars = xmlCharEncFirstLineInput(input->buf, len);
1.1       misho    1213:             }
                   1214:             if (nbchars < 0) {
                   1215:                 xmlErrInternal(ctxt,
                   1216:                                "switching encoding: encoder error\n",
                   1217:                                NULL);
                   1218:                 return (-1);
                   1219:             }
1.1.1.3 ! misho    1220:            input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
        !          1221:             xmlBufResetInput(input->buf->buffer, input);
1.1       misho    1222:         }
                   1223:         return (0);
                   1224:     } else if (input->length == 0) {
                   1225:        /*
                   1226:         * When parsing a static memory array one must know the
                   1227:         * size to be able to convert the buffer.
                   1228:         */
                   1229:        xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
                   1230:        return (-1);
                   1231:     }
                   1232:     return (0);
                   1233: }
                   1234: 
                   1235: /**
                   1236:  * xmlSwitchInputEncoding:
                   1237:  * @ctxt:  the parser context
                   1238:  * @input:  the input stream
                   1239:  * @handler:  the encoding handler
                   1240:  *
                   1241:  * change the input functions when discovering the character encoding
                   1242:  * of a given entity.
                   1243:  *
                   1244:  * Returns 0 in case of success, -1 otherwise
                   1245:  */
                   1246: int
                   1247: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                   1248:                           xmlCharEncodingHandlerPtr handler) {
                   1249:     return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
                   1250: }
                   1251: 
                   1252: /**
                   1253:  * xmlSwitchToEncodingInt:
                   1254:  * @ctxt:  the parser context
                   1255:  * @handler:  the encoding handler
1.1.1.3 ! misho    1256:  * @len: the length to convert or -1
1.1       misho    1257:  *
                   1258:  * change the input functions when discovering the character encoding
                   1259:  * of a given entity, and convert only @len bytes of the output, this
                   1260:  * is needed on auto detect to allows any declared encoding later to
                   1261:  * convert the actual content after the xmlDecl
                   1262:  *
                   1263:  * Returns 0 in case of success, -1 otherwise
                   1264:  */
                   1265: static int
                   1266: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
                   1267:                        xmlCharEncodingHandlerPtr handler, int len) {
                   1268:     int ret = 0;
                   1269: 
                   1270:     if (handler != NULL) {
                   1271:         if (ctxt->input != NULL) {
                   1272:            ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
                   1273:        } else {
                   1274:            xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
                   1275:                           NULL);
                   1276:            return(-1);
                   1277:        }
                   1278:        /*
                   1279:         * The parsing is now done in UTF8 natively
                   1280:         */
                   1281:        ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1282:     } else
                   1283:        return(-1);
                   1284:     return(ret);
                   1285: }
                   1286: 
                   1287: /**
                   1288:  * xmlSwitchToEncoding:
                   1289:  * @ctxt:  the parser context
                   1290:  * @handler:  the encoding handler
                   1291:  *
                   1292:  * change the input functions when discovering the character encoding
                   1293:  * of a given entity.
                   1294:  *
                   1295:  * Returns 0 in case of success, -1 otherwise
                   1296:  */
                   1297: int
1.1.1.3 ! misho    1298: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1.1       misho    1299: {
                   1300:     return (xmlSwitchToEncodingInt(ctxt, handler, -1));
                   1301: }
                   1302: 
                   1303: /************************************************************************
                   1304:  *                                                                     *
                   1305:  *     Commodity functions to handle entities processing               *
                   1306:  *                                                                     *
                   1307:  ************************************************************************/
                   1308: 
                   1309: /**
                   1310:  * xmlFreeInputStream:
                   1311:  * @input:  an xmlParserInputPtr
                   1312:  *
                   1313:  * Free up an input stream.
                   1314:  */
                   1315: void
                   1316: xmlFreeInputStream(xmlParserInputPtr input) {
                   1317:     if (input == NULL) return;
                   1318: 
                   1319:     if (input->filename != NULL) xmlFree((char *) input->filename);
                   1320:     if (input->directory != NULL) xmlFree((char *) input->directory);
                   1321:     if (input->encoding != NULL) xmlFree((char *) input->encoding);
                   1322:     if (input->version != NULL) xmlFree((char *) input->version);
                   1323:     if ((input->free != NULL) && (input->base != NULL))
                   1324:         input->free((xmlChar *) input->base);
1.1.1.3 ! misho    1325:     if (input->buf != NULL)
1.1       misho    1326:         xmlFreeParserInputBuffer(input->buf);
                   1327:     xmlFree(input);
                   1328: }
                   1329: 
                   1330: /**
                   1331:  * xmlNewInputStream:
                   1332:  * @ctxt:  an XML parser context
                   1333:  *
1.1.1.2   misho    1334:  * Create a new input stream structure.
                   1335:  *
1.1       misho    1336:  * Returns the new input stream or NULL
                   1337:  */
                   1338: xmlParserInputPtr
                   1339: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
                   1340:     xmlParserInputPtr input;
                   1341: 
                   1342:     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
                   1343:     if (input == NULL) {
                   1344:         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
                   1345:        return(NULL);
                   1346:     }
                   1347:     memset(input, 0, sizeof(xmlParserInput));
                   1348:     input->line = 1;
                   1349:     input->col = 1;
                   1350:     input->standalone = -1;
1.1.1.2   misho    1351: 
1.1       misho    1352:     /*
1.1.1.2   misho    1353:      * If the context is NULL the id cannot be initialized, but that
                   1354:      * should not happen while parsing which is the situation where
                   1355:      * the id is actually needed.
1.1       misho    1356:      */
1.1.1.2   misho    1357:     if (ctxt != NULL)
                   1358:         input->id = ctxt->input_id++;
                   1359: 
1.1       misho    1360:     return(input);
                   1361: }
                   1362: 
                   1363: /**
                   1364:  * xmlNewIOInputStream:
                   1365:  * @ctxt:  an XML parser context
                   1366:  * @input:  an I/O Input
                   1367:  * @enc:  the charset encoding if known
                   1368:  *
                   1369:  * Create a new input stream structure encapsulating the @input into
                   1370:  * a stream suitable for the parser.
                   1371:  *
                   1372:  * Returns the new input stream or NULL
                   1373:  */
                   1374: xmlParserInputPtr
                   1375: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
                   1376:                    xmlCharEncoding enc) {
                   1377:     xmlParserInputPtr inputStream;
                   1378: 
                   1379:     if (input == NULL) return(NULL);
                   1380:     if (xmlParserDebugEntities)
                   1381:        xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
                   1382:     inputStream = xmlNewInputStream(ctxt);
                   1383:     if (inputStream == NULL) {
                   1384:        return(NULL);
                   1385:     }
                   1386:     inputStream->filename = NULL;
                   1387:     inputStream->buf = input;
1.1.1.3 ! misho    1388:     xmlBufResetInput(inputStream->buf->buffer, inputStream);
        !          1389: 
1.1       misho    1390:     if (enc != XML_CHAR_ENCODING_NONE) {
                   1391:         xmlSwitchEncoding(ctxt, enc);
                   1392:     }
                   1393: 
                   1394:     return(inputStream);
                   1395: }
                   1396: 
                   1397: /**
                   1398:  * xmlNewEntityInputStream:
                   1399:  * @ctxt:  an XML parser context
                   1400:  * @entity:  an Entity pointer
                   1401:  *
                   1402:  * Create a new input stream based on an xmlEntityPtr
                   1403:  *
                   1404:  * Returns the new input stream or NULL
                   1405:  */
                   1406: xmlParserInputPtr
                   1407: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
                   1408:     xmlParserInputPtr input;
                   1409: 
                   1410:     if (entity == NULL) {
                   1411:         xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
                   1412:                       NULL);
                   1413:        return(NULL);
                   1414:     }
                   1415:     if (xmlParserDebugEntities)
                   1416:        xmlGenericError(xmlGenericErrorContext,
                   1417:                "new input from entity: %s\n", entity->name);
                   1418:     if (entity->content == NULL) {
                   1419:        switch (entity->etype) {
                   1420:             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
                   1421:                xmlErrInternal(ctxt, "Cannot parse entity %s\n",
                   1422:                               entity->name);
                   1423:                 break;
                   1424:             case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
                   1425:             case XML_EXTERNAL_PARAMETER_ENTITY:
                   1426:                return(xmlLoadExternalEntity((char *) entity->URI,
                   1427:                       (char *) entity->ExternalID, ctxt));
                   1428:             case XML_INTERNAL_GENERAL_ENTITY:
                   1429:                xmlErrInternal(ctxt,
                   1430:                      "Internal entity %s without content !\n",
                   1431:                               entity->name);
                   1432:                 break;
                   1433:             case XML_INTERNAL_PARAMETER_ENTITY:
                   1434:                xmlErrInternal(ctxt,
                   1435:                      "Internal parameter entity %s without content !\n",
                   1436:                               entity->name);
                   1437:                 break;
                   1438:             case XML_INTERNAL_PREDEFINED_ENTITY:
                   1439:                xmlErrInternal(ctxt,
                   1440:                      "Predefined entity %s without content !\n",
                   1441:                               entity->name);
                   1442:                 break;
                   1443:        }
                   1444:        return(NULL);
                   1445:     }
                   1446:     input = xmlNewInputStream(ctxt);
                   1447:     if (input == NULL) {
                   1448:        return(NULL);
                   1449:     }
                   1450:     if (entity->URI != NULL)
                   1451:        input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
                   1452:     input->base = entity->content;
                   1453:     input->cur = entity->content;
                   1454:     input->length = entity->length;
                   1455:     input->end = &entity->content[input->length];
                   1456:     return(input);
                   1457: }
                   1458: 
                   1459: /**
                   1460:  * xmlNewStringInputStream:
                   1461:  * @ctxt:  an XML parser context
                   1462:  * @buffer:  an memory buffer
                   1463:  *
                   1464:  * Create a new input stream based on a memory buffer.
                   1465:  * Returns the new input stream
                   1466:  */
                   1467: xmlParserInputPtr
                   1468: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
                   1469:     xmlParserInputPtr input;
                   1470: 
                   1471:     if (buffer == NULL) {
                   1472:         xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
                   1473:                       NULL);
                   1474:        return(NULL);
                   1475:     }
                   1476:     if (xmlParserDebugEntities)
                   1477:        xmlGenericError(xmlGenericErrorContext,
                   1478:                "new fixed input: %.30s\n", buffer);
                   1479:     input = xmlNewInputStream(ctxt);
                   1480:     if (input == NULL) {
                   1481:         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
                   1482:        return(NULL);
                   1483:     }
                   1484:     input->base = buffer;
                   1485:     input->cur = buffer;
                   1486:     input->length = xmlStrlen(buffer);
                   1487:     input->end = &buffer[input->length];
                   1488:     return(input);
                   1489: }
                   1490: 
                   1491: /**
                   1492:  * xmlNewInputFromFile:
                   1493:  * @ctxt:  an XML parser context
                   1494:  * @filename:  the filename to use as entity
                   1495:  *
                   1496:  * Create a new input stream based on a file or an URL.
                   1497:  *
                   1498:  * Returns the new input stream or NULL in case of error
                   1499:  */
                   1500: xmlParserInputPtr
                   1501: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
                   1502:     xmlParserInputBufferPtr buf;
                   1503:     xmlParserInputPtr inputStream;
                   1504:     char *directory = NULL;
                   1505:     xmlChar *URI = NULL;
                   1506: 
                   1507:     if (xmlParserDebugEntities)
                   1508:        xmlGenericError(xmlGenericErrorContext,
                   1509:                "new input from file: %s\n", filename);
                   1510:     if (ctxt == NULL) return(NULL);
                   1511:     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
                   1512:     if (buf == NULL) {
                   1513:        if (filename == NULL)
                   1514:            __xmlLoaderErr(ctxt,
                   1515:                           "failed to load external entity: NULL filename \n",
                   1516:                           NULL);
                   1517:        else
                   1518:            __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
                   1519:                           (const char *) filename);
                   1520:        return(NULL);
                   1521:     }
                   1522: 
                   1523:     inputStream = xmlNewInputStream(ctxt);
                   1524:     if (inputStream == NULL)
                   1525:        return(NULL);
                   1526: 
                   1527:     inputStream->buf = buf;
                   1528:     inputStream = xmlCheckHTTPInput(ctxt, inputStream);
                   1529:     if (inputStream == NULL)
                   1530:         return(NULL);
1.1.1.3 ! misho    1531: 
1.1       misho    1532:     if (inputStream->filename == NULL)
                   1533:        URI = xmlStrdup((xmlChar *) filename);
                   1534:     else
                   1535:        URI = xmlStrdup((xmlChar *) inputStream->filename);
                   1536:     directory = xmlParserGetDirectory((const char *) URI);
                   1537:     if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
                   1538:     inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
                   1539:     if (URI != NULL) xmlFree((char *) URI);
                   1540:     inputStream->directory = directory;
                   1541: 
1.1.1.3 ! misho    1542:     xmlBufResetInput(inputStream->buf->buffer, inputStream);
1.1       misho    1543:     if ((ctxt->directory == NULL) && (directory != NULL))
                   1544:         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
                   1545:     return(inputStream);
                   1546: }
                   1547: 
                   1548: /************************************************************************
                   1549:  *                                                                     *
                   1550:  *             Commodity functions to handle parser contexts           *
                   1551:  *                                                                     *
                   1552:  ************************************************************************/
                   1553: 
                   1554: /**
                   1555:  * xmlInitParserCtxt:
                   1556:  * @ctxt:  an XML parser context
                   1557:  *
                   1558:  * Initialize a parser context
                   1559:  *
                   1560:  * Returns 0 in case of success and -1 in case of error
                   1561:  */
                   1562: 
                   1563: int
                   1564: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
                   1565: {
                   1566:     xmlParserInputPtr input;
                   1567: 
                   1568:     if(ctxt==NULL) {
                   1569:         xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
                   1570:         return(-1);
                   1571:     }
                   1572: 
                   1573:     xmlDefaultSAXHandlerInit();
                   1574: 
                   1575:     if (ctxt->dict == NULL)
                   1576:        ctxt->dict = xmlDictCreate();
                   1577:     if (ctxt->dict == NULL) {
                   1578:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1579:        return(-1);
                   1580:     }
1.1.1.3 ! misho    1581:     xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
        !          1582: 
1.1       misho    1583:     if (ctxt->sax == NULL)
                   1584:        ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
                   1585:     if (ctxt->sax == NULL) {
                   1586:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1587:        return(-1);
                   1588:     }
                   1589:     else
                   1590:         xmlSAXVersion(ctxt->sax, 2);
                   1591: 
                   1592:     ctxt->maxatts = 0;
                   1593:     ctxt->atts = NULL;
                   1594:     /* Allocate the Input stack */
                   1595:     if (ctxt->inputTab == NULL) {
                   1596:        ctxt->inputTab = (xmlParserInputPtr *)
                   1597:                    xmlMalloc(5 * sizeof(xmlParserInputPtr));
                   1598:        ctxt->inputMax = 5;
                   1599:     }
                   1600:     if (ctxt->inputTab == NULL) {
                   1601:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1602:        ctxt->inputNr = 0;
                   1603:        ctxt->inputMax = 0;
                   1604:        ctxt->input = NULL;
                   1605:        return(-1);
                   1606:     }
                   1607:     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
                   1608:         xmlFreeInputStream(input);
                   1609:     }
                   1610:     ctxt->inputNr = 0;
                   1611:     ctxt->input = NULL;
                   1612: 
                   1613:     ctxt->version = NULL;
                   1614:     ctxt->encoding = NULL;
                   1615:     ctxt->standalone = -1;
                   1616:     ctxt->hasExternalSubset = 0;
                   1617:     ctxt->hasPErefs = 0;
                   1618:     ctxt->html = 0;
                   1619:     ctxt->external = 0;
                   1620:     ctxt->instate = XML_PARSER_START;
                   1621:     ctxt->token = 0;
                   1622:     ctxt->directory = NULL;
                   1623: 
                   1624:     /* Allocate the Node stack */
                   1625:     if (ctxt->nodeTab == NULL) {
                   1626:        ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
                   1627:        ctxt->nodeMax = 10;
                   1628:     }
                   1629:     if (ctxt->nodeTab == NULL) {
                   1630:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1631:        ctxt->nodeNr = 0;
                   1632:        ctxt->nodeMax = 0;
                   1633:        ctxt->node = NULL;
                   1634:        ctxt->inputNr = 0;
                   1635:        ctxt->inputMax = 0;
                   1636:        ctxt->input = NULL;
                   1637:        return(-1);
                   1638:     }
                   1639:     ctxt->nodeNr = 0;
                   1640:     ctxt->node = NULL;
                   1641: 
                   1642:     /* Allocate the Name stack */
                   1643:     if (ctxt->nameTab == NULL) {
                   1644:        ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
                   1645:        ctxt->nameMax = 10;
                   1646:     }
                   1647:     if (ctxt->nameTab == NULL) {
                   1648:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1649:        ctxt->nodeNr = 0;
                   1650:        ctxt->nodeMax = 0;
                   1651:        ctxt->node = NULL;
                   1652:        ctxt->inputNr = 0;
                   1653:        ctxt->inputMax = 0;
                   1654:        ctxt->input = NULL;
                   1655:        ctxt->nameNr = 0;
                   1656:        ctxt->nameMax = 0;
                   1657:        ctxt->name = NULL;
                   1658:        return(-1);
                   1659:     }
                   1660:     ctxt->nameNr = 0;
                   1661:     ctxt->name = NULL;
                   1662: 
                   1663:     /* Allocate the space stack */
                   1664:     if (ctxt->spaceTab == NULL) {
                   1665:        ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
                   1666:        ctxt->spaceMax = 10;
                   1667:     }
                   1668:     if (ctxt->spaceTab == NULL) {
                   1669:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1670:        ctxt->nodeNr = 0;
                   1671:        ctxt->nodeMax = 0;
                   1672:        ctxt->node = NULL;
                   1673:        ctxt->inputNr = 0;
                   1674:        ctxt->inputMax = 0;
                   1675:        ctxt->input = NULL;
                   1676:        ctxt->nameNr = 0;
                   1677:        ctxt->nameMax = 0;
                   1678:        ctxt->name = NULL;
                   1679:        ctxt->spaceNr = 0;
                   1680:        ctxt->spaceMax = 0;
                   1681:        ctxt->space = NULL;
                   1682:        return(-1);
                   1683:     }
                   1684:     ctxt->spaceNr = 1;
                   1685:     ctxt->spaceMax = 10;
                   1686:     ctxt->spaceTab[0] = -1;
                   1687:     ctxt->space = &ctxt->spaceTab[0];
                   1688:     ctxt->userData = ctxt;
                   1689:     ctxt->myDoc = NULL;
                   1690:     ctxt->wellFormed = 1;
                   1691:     ctxt->nsWellFormed = 1;
                   1692:     ctxt->valid = 1;
                   1693:     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
                   1694:     ctxt->validate = xmlDoValidityCheckingDefaultValue;
                   1695:     ctxt->pedantic = xmlPedanticParserDefaultValue;
                   1696:     ctxt->linenumbers = xmlLineNumbersDefaultValue;
                   1697:     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
                   1698:     if (ctxt->keepBlanks == 0)
                   1699:        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
                   1700: 
                   1701:     ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
                   1702:     ctxt->vctxt.userData = ctxt;
                   1703:     ctxt->vctxt.error = xmlParserValidityError;
                   1704:     ctxt->vctxt.warning = xmlParserValidityWarning;
                   1705:     if (ctxt->validate) {
                   1706:        if (xmlGetWarningsDefaultValue == 0)
                   1707:            ctxt->vctxt.warning = NULL;
                   1708:        else
                   1709:            ctxt->vctxt.warning = xmlParserValidityWarning;
                   1710:        ctxt->vctxt.nodeMax = 0;
                   1711:     }
                   1712:     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
                   1713:     ctxt->record_info = 0;
                   1714:     ctxt->nbChars = 0;
                   1715:     ctxt->checkIndex = 0;
                   1716:     ctxt->inSubset = 0;
                   1717:     ctxt->errNo = XML_ERR_OK;
                   1718:     ctxt->depth = 0;
                   1719:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1720:     ctxt->catalogs = NULL;
                   1721:     ctxt->nbentities = 0;
1.1.1.3 ! misho    1722:     ctxt->sizeentities = 0;
        !          1723:     ctxt->sizeentcopy = 0;
1.1.1.2   misho    1724:     ctxt->input_id = 1;
1.1       misho    1725:     xmlInitNodeInfoSeq(&ctxt->node_seq);
                   1726:     return(0);
                   1727: }
                   1728: 
                   1729: /**
                   1730:  * xmlFreeParserCtxt:
                   1731:  * @ctxt:  an XML parser context
                   1732:  *
                   1733:  * Free all the memory used by a parser context. However the parsed
                   1734:  * document in ctxt->myDoc is not freed.
                   1735:  */
                   1736: 
                   1737: void
                   1738: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
                   1739: {
                   1740:     xmlParserInputPtr input;
                   1741: 
                   1742:     if (ctxt == NULL) return;
                   1743: 
                   1744:     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
                   1745:         xmlFreeInputStream(input);
                   1746:     }
                   1747:     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
                   1748:     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
                   1749:     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
                   1750:     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
                   1751:     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
                   1752:     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
                   1753:     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
                   1754:     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
                   1755:     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
                   1756: #ifdef LIBXML_SAX1_ENABLED
                   1757:     if ((ctxt->sax != NULL) &&
                   1758:         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
                   1759: #else
                   1760:     if (ctxt->sax != NULL)
                   1761: #endif /* LIBXML_SAX1_ENABLED */
                   1762:         xmlFree(ctxt->sax);
                   1763:     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
                   1764:     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
                   1765:     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
                   1766:     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
                   1767:     if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
                   1768:     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
                   1769:     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1.1.1.3 ! misho    1770:     if (ctxt->attsDefault != NULL)
1.1       misho    1771:         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
                   1772:     if (ctxt->attsSpecial != NULL)
                   1773:         xmlHashFree(ctxt->attsSpecial, NULL);
                   1774:     if (ctxt->freeElems != NULL) {
                   1775:         xmlNodePtr cur, next;
                   1776: 
                   1777:        cur = ctxt->freeElems;
                   1778:        while (cur != NULL) {
                   1779:            next = cur->next;
                   1780:            xmlFree(cur);
                   1781:            cur = next;
                   1782:        }
                   1783:     }
                   1784:     if (ctxt->freeAttrs != NULL) {
                   1785:         xmlAttrPtr cur, next;
                   1786: 
                   1787:        cur = ctxt->freeAttrs;
                   1788:        while (cur != NULL) {
                   1789:            next = cur->next;
                   1790:            xmlFree(cur);
                   1791:            cur = next;
                   1792:        }
                   1793:     }
                   1794:     /*
                   1795:      * cleanup the error strings
                   1796:      */
                   1797:     if (ctxt->lastError.message != NULL)
                   1798:         xmlFree(ctxt->lastError.message);
                   1799:     if (ctxt->lastError.file != NULL)
                   1800:         xmlFree(ctxt->lastError.file);
                   1801:     if (ctxt->lastError.str1 != NULL)
                   1802:         xmlFree(ctxt->lastError.str1);
                   1803:     if (ctxt->lastError.str2 != NULL)
                   1804:         xmlFree(ctxt->lastError.str2);
                   1805:     if (ctxt->lastError.str3 != NULL)
                   1806:         xmlFree(ctxt->lastError.str3);
                   1807: 
                   1808: #ifdef LIBXML_CATALOG_ENABLED
                   1809:     if (ctxt->catalogs != NULL)
                   1810:        xmlCatalogFreeLocal(ctxt->catalogs);
                   1811: #endif
                   1812:     xmlFree(ctxt);
                   1813: }
                   1814: 
                   1815: /**
                   1816:  * xmlNewParserCtxt:
                   1817:  *
                   1818:  * Allocate and initialize a new parser context.
                   1819:  *
                   1820:  * Returns the xmlParserCtxtPtr or NULL
                   1821:  */
                   1822: 
                   1823: xmlParserCtxtPtr
                   1824: xmlNewParserCtxt(void)
                   1825: {
                   1826:     xmlParserCtxtPtr ctxt;
                   1827: 
                   1828:     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
                   1829:     if (ctxt == NULL) {
                   1830:        xmlErrMemory(NULL, "cannot allocate parser context\n");
                   1831:        return(NULL);
                   1832:     }
                   1833:     memset(ctxt, 0, sizeof(xmlParserCtxt));
                   1834:     if (xmlInitParserCtxt(ctxt) < 0) {
                   1835:         xmlFreeParserCtxt(ctxt);
                   1836:        return(NULL);
                   1837:     }
                   1838:     return(ctxt);
                   1839: }
                   1840: 
                   1841: /************************************************************************
                   1842:  *                                                                     *
                   1843:  *             Handling of node informations                           *
                   1844:  *                                                                     *
                   1845:  ************************************************************************/
                   1846: 
                   1847: /**
                   1848:  * xmlClearParserCtxt:
                   1849:  * @ctxt:  an XML parser context
                   1850:  *
                   1851:  * Clear (release owned resources) and reinitialize a parser context
                   1852:  */
                   1853: 
                   1854: void
                   1855: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
                   1856: {
                   1857:   if (ctxt==NULL)
                   1858:     return;
                   1859:   xmlClearNodeInfoSeq(&ctxt->node_seq);
                   1860:   xmlCtxtReset(ctxt);
                   1861: }
                   1862: 
                   1863: 
                   1864: /**
                   1865:  * xmlParserFindNodeInfo:
                   1866:  * @ctx:  an XML parser context
                   1867:  * @node:  an XML node within the tree
                   1868:  *
                   1869:  * Find the parser node info struct for a given node
1.1.1.3 ! misho    1870:  *
1.1       misho    1871:  * Returns an xmlParserNodeInfo block pointer or NULL
                   1872:  */
                   1873: const xmlParserNodeInfo *
                   1874: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
                   1875: {
                   1876:     unsigned long pos;
                   1877: 
                   1878:     if ((ctx == NULL) || (node == NULL))
                   1879:         return (NULL);
                   1880:     /* Find position where node should be at */
                   1881:     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
                   1882:     if (pos < ctx->node_seq.length
                   1883:         && ctx->node_seq.buffer[pos].node == node)
                   1884:         return &ctx->node_seq.buffer[pos];
                   1885:     else
                   1886:         return NULL;
                   1887: }
                   1888: 
                   1889: 
                   1890: /**
                   1891:  * xmlInitNodeInfoSeq:
                   1892:  * @seq:  a node info sequence pointer
                   1893:  *
                   1894:  * -- Initialize (set to initial state) node info sequence
                   1895:  */
                   1896: void
                   1897: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
                   1898: {
                   1899:     if (seq == NULL)
                   1900:         return;
                   1901:     seq->length = 0;
                   1902:     seq->maximum = 0;
                   1903:     seq->buffer = NULL;
                   1904: }
                   1905: 
                   1906: /**
                   1907:  * xmlClearNodeInfoSeq:
                   1908:  * @seq:  a node info sequence pointer
                   1909:  *
                   1910:  * -- Clear (release memory and reinitialize) node
                   1911:  *   info sequence
                   1912:  */
                   1913: void
                   1914: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
                   1915: {
                   1916:     if (seq == NULL)
                   1917:         return;
                   1918:     if (seq->buffer != NULL)
                   1919:         xmlFree(seq->buffer);
                   1920:     xmlInitNodeInfoSeq(seq);
                   1921: }
                   1922: 
                   1923: /**
                   1924:  * xmlParserFindNodeInfoIndex:
                   1925:  * @seq:  a node info sequence pointer
                   1926:  * @node:  an XML node pointer
                   1927:  *
1.1.1.3 ! misho    1928:  *
1.1       misho    1929:  * xmlParserFindNodeInfoIndex : Find the index that the info record for
                   1930:  *   the given node is or should be at in a sorted sequence
                   1931:  *
                   1932:  * Returns a long indicating the position of the record
                   1933:  */
                   1934: unsigned long
                   1935: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
                   1936:                            const xmlNodePtr node)
                   1937: {
                   1938:     unsigned long upper, lower, middle;
                   1939:     int found = 0;
                   1940: 
                   1941:     if ((seq == NULL) || (node == NULL))
                   1942:         return ((unsigned long) -1);
                   1943: 
                   1944:     /* Do a binary search for the key */
                   1945:     lower = 1;
                   1946:     upper = seq->length;
                   1947:     middle = 0;
                   1948:     while (lower <= upper && !found) {
                   1949:         middle = lower + (upper - lower) / 2;
                   1950:         if (node == seq->buffer[middle - 1].node)
                   1951:             found = 1;
                   1952:         else if (node < seq->buffer[middle - 1].node)
                   1953:             upper = middle - 1;
                   1954:         else
                   1955:             lower = middle + 1;
                   1956:     }
                   1957: 
                   1958:     /* Return position */
                   1959:     if (middle == 0 || seq->buffer[middle - 1].node < node)
                   1960:         return middle;
                   1961:     else
                   1962:         return middle - 1;
                   1963: }
                   1964: 
                   1965: 
                   1966: /**
                   1967:  * xmlParserAddNodeInfo:
                   1968:  * @ctxt:  an XML parser context
                   1969:  * @info:  a node info sequence pointer
                   1970:  *
                   1971:  * Insert node info record into the sorted sequence
                   1972:  */
                   1973: void
                   1974: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
                   1975:                      const xmlParserNodeInfoPtr info)
                   1976: {
                   1977:     unsigned long pos;
                   1978: 
                   1979:     if ((ctxt == NULL) || (info == NULL)) return;
                   1980: 
                   1981:     /* Find pos and check to see if node is already in the sequence */
                   1982:     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
                   1983:                                      info->node);
                   1984: 
1.1.1.3 ! misho    1985:     if ((pos < ctxt->node_seq.length) &&
1.1       misho    1986:         (ctxt->node_seq.buffer != NULL) &&
                   1987:         (ctxt->node_seq.buffer[pos].node == info->node)) {
                   1988:         ctxt->node_seq.buffer[pos] = *info;
                   1989:     }
                   1990: 
                   1991:     /* Otherwise, we need to add new node to buffer */
                   1992:     else {
                   1993:         if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
                   1994:             xmlParserNodeInfo *tmp_buffer;
                   1995:             unsigned int byte_size;
                   1996: 
                   1997:             if (ctxt->node_seq.maximum == 0)
                   1998:                 ctxt->node_seq.maximum = 2;
                   1999:             byte_size = (sizeof(*ctxt->node_seq.buffer) *
                   2000:                        (2 * ctxt->node_seq.maximum));
                   2001: 
                   2002:             if (ctxt->node_seq.buffer == NULL)
                   2003:                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
                   2004:             else
                   2005:                 tmp_buffer =
                   2006:                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
                   2007:                                                      byte_size);
                   2008: 
                   2009:             if (tmp_buffer == NULL) {
                   2010:                xmlErrMemory(ctxt, "failed to allocate buffer\n");
                   2011:                 return;
                   2012:             }
                   2013:             ctxt->node_seq.buffer = tmp_buffer;
                   2014:             ctxt->node_seq.maximum *= 2;
                   2015:         }
                   2016: 
                   2017:         /* If position is not at end, move elements out of the way */
                   2018:         if (pos != ctxt->node_seq.length) {
                   2019:             unsigned long i;
                   2020: 
                   2021:             for (i = ctxt->node_seq.length; i > pos; i--)
                   2022:                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
                   2023:         }
                   2024: 
                   2025:         /* Copy element and increase length */
                   2026:         ctxt->node_seq.buffer[pos] = *info;
                   2027:         ctxt->node_seq.length++;
                   2028:     }
                   2029: }
                   2030: 
                   2031: /************************************************************************
                   2032:  *                                                                     *
                   2033:  *             Defaults settings                                       *
                   2034:  *                                                                     *
                   2035:  ************************************************************************/
                   2036: /**
                   2037:  * xmlPedanticParserDefault:
1.1.1.3 ! misho    2038:  * @val:  int 0 or 1
1.1       misho    2039:  *
                   2040:  * Set and return the previous value for enabling pedantic warnings.
                   2041:  *
                   2042:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2043:  */
                   2044: 
                   2045: int
                   2046: xmlPedanticParserDefault(int val) {
                   2047:     int old = xmlPedanticParserDefaultValue;
                   2048: 
                   2049:     xmlPedanticParserDefaultValue = val;
                   2050:     return(old);
                   2051: }
                   2052: 
                   2053: /**
                   2054:  * xmlLineNumbersDefault:
1.1.1.3 ! misho    2055:  * @val:  int 0 or 1
1.1       misho    2056:  *
                   2057:  * Set and return the previous value for enabling line numbers in elements
                   2058:  * contents. This may break on old application and is turned off by default.
                   2059:  *
                   2060:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2061:  */
                   2062: 
                   2063: int
                   2064: xmlLineNumbersDefault(int val) {
                   2065:     int old = xmlLineNumbersDefaultValue;
                   2066: 
                   2067:     xmlLineNumbersDefaultValue = val;
                   2068:     return(old);
                   2069: }
                   2070: 
                   2071: /**
                   2072:  * xmlSubstituteEntitiesDefault:
1.1.1.3 ! misho    2073:  * @val:  int 0 or 1
1.1       misho    2074:  *
                   2075:  * Set and return the previous value for default entity support.
                   2076:  * Initially the parser always keep entity references instead of substituting
                   2077:  * entity values in the output. This function has to be used to change the
                   2078:  * default parser behavior
                   2079:  * SAX::substituteEntities() has to be used for changing that on a file by
                   2080:  * file basis.
                   2081:  *
                   2082:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2083:  */
                   2084: 
                   2085: int
                   2086: xmlSubstituteEntitiesDefault(int val) {
                   2087:     int old = xmlSubstituteEntitiesDefaultValue;
                   2088: 
                   2089:     xmlSubstituteEntitiesDefaultValue = val;
                   2090:     return(old);
                   2091: }
                   2092: 
                   2093: /**
                   2094:  * xmlKeepBlanksDefault:
1.1.1.3 ! misho    2095:  * @val:  int 0 or 1
1.1       misho    2096:  *
                   2097:  * Set and return the previous value for default blanks text nodes support.
                   2098:  * The 1.x version of the parser used an heuristic to try to detect
                   2099:  * ignorable white spaces. As a result the SAX callback was generating
                   2100:  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
                   2101:  * using the DOM output text nodes containing those blanks were not generated.
                   2102:  * The 2.x and later version will switch to the XML standard way and
                   2103:  * ignorableWhitespace() are only generated when running the parser in
                   2104:  * validating mode and when the current element doesn't allow CDATA or
                   2105:  * mixed content.
1.1.1.3 ! misho    2106:  * This function is provided as a way to force the standard behavior
1.1       misho    2107:  * on 1.X libs and to switch back to the old mode for compatibility when
                   2108:  * running 1.X client code on 2.X . Upgrade of 1.X code should be done
                   2109:  * by using xmlIsBlankNode() commodity function to detect the "empty"
                   2110:  * nodes generated.
                   2111:  * This value also affect autogeneration of indentation when saving code
                   2112:  * if blanks sections are kept, indentation is not generated.
                   2113:  *
                   2114:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2115:  */
                   2116: 
                   2117: int
                   2118: xmlKeepBlanksDefault(int val) {
                   2119:     int old = xmlKeepBlanksDefaultValue;
                   2120: 
                   2121:     xmlKeepBlanksDefaultValue = val;
                   2122:     if (!val) xmlIndentTreeOutput = 1;
                   2123:     return(old);
                   2124: }
                   2125: 
                   2126: #define bottom_parserInternals
                   2127: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>