embedaddon/libxml2/parserInternals.c - annotate

Return to parserInternals.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2
Annotation of embedaddon/libxml2/parserInternals.c, revision 1.1.1.2

1.1       misho       1: /*
                      2:  * parserInternals.c : Internal routines (and obsolete ones) needed for the
                      3:  *                     XML and HTML parsers.
                      4:  *
                      5:  * See Copyright for the status of this software.
                      6:  *
                      7:  * daniel@veillard.com
                      8:  */
                      9: 
                     10: #define IN_LIBXML
                     11: #include "libxml.h"
                     12: 
                     13: #if defined(WIN32) && !defined (__CYGWIN__)
                     14: #define XML_DIR_SEP '\\'
                     15: #else
                     16: #define XML_DIR_SEP '/'
                     17: #endif
                     18: 
                     19: #include <string.h>
                     20: #ifdef HAVE_CTYPE_H
                     21: #include <ctype.h>
                     22: #endif
                     23: #ifdef HAVE_STDLIB_H
                     24: #include <stdlib.h>
                     25: #endif
                     26: #ifdef HAVE_SYS_STAT_H
                     27: #include <sys/stat.h>
                     28: #endif
                     29: #ifdef HAVE_FCNTL_H
                     30: #include <fcntl.h>
                     31: #endif
                     32: #ifdef HAVE_UNISTD_H
                     33: #include <unistd.h>
                     34: #endif
                     35: #ifdef HAVE_ZLIB_H
                     36: #include <zlib.h>
                     37: #endif
                     38: 
                     39: #include <libxml/xmlmemory.h>
                     40: #include <libxml/tree.h>
                     41: #include <libxml/parser.h>
                     42: #include <libxml/parserInternals.h>
                     43: #include <libxml/valid.h>
                     44: #include <libxml/entities.h>
                     45: #include <libxml/xmlerror.h>
                     46: #include <libxml/encoding.h>
                     47: #include <libxml/valid.h>
                     48: #include <libxml/xmlIO.h>
                     49: #include <libxml/uri.h>
                     50: #include <libxml/dict.h>
                     51: #include <libxml/SAX.h>
                     52: #ifdef LIBXML_CATALOG_ENABLED
                     53: #include <libxml/catalog.h>
                     54: #endif
                     55: #include <libxml/globals.h>
                     56: #include <libxml/chvalid.h>
                     57: 
                     58: /*
                     59:  * Various global defaults for parsing
                     60:  */
                     61: 
                     62: /**
                     63:  * xmlCheckVersion:
                     64:  * @version: the include version number
                     65:  *
                     66:  * check the compiled lib version against the include one.
                     67:  * This can warn or immediately kill the application
                     68:  */
                     69: void
                     70: xmlCheckVersion(int version) {
                     71:     int myversion = (int) LIBXML_VERSION;
                     72: 
                     73:     xmlInitParser();
                     74: 
                     75:     if ((myversion / 10000) != (version / 10000)) {
                     76:        xmlGenericError(xmlGenericErrorContext, 
                     77:                "Fatal: program compiled against libxml %d using libxml %d\n",
                     78:                (version / 10000), (myversion / 10000));
                     79:        fprintf(stderr, 
                     80:                "Fatal: program compiled against libxml %d using libxml %d\n",
                     81:                (version / 10000), (myversion / 10000));
                     82:     }
                     83:     if ((myversion / 100) < (version / 100)) {
                     84:        xmlGenericError(xmlGenericErrorContext, 
                     85:                "Warning: program compiled against libxml %d using older %d\n",
                     86:                (version / 100), (myversion / 100));
                     87:     }
                     88: }
                     89: 
                     90: 
                     91: /************************************************************************
                     92:  *                                                                     *
                     93:  *             Some factorized error routines                          *
                     94:  *                                                                     *
                     95:  ************************************************************************/
                     96: 
                     97: 
                     98: /**
                     99:  * xmlErrMemory:
                    100:  * @ctxt:  an XML parser context
                    101:  * @extra:  extra informations
                    102:  *
                    103:  * Handle a redefinition of attribute error
                    104:  */
                    105: void
                    106: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
                    107: {
                    108:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    109:         (ctxt->instate == XML_PARSER_EOF))
                    110:        return;
                    111:     if (ctxt != NULL) {
                    112:         ctxt->errNo = XML_ERR_NO_MEMORY;
                    113:         ctxt->instate = XML_PARSER_EOF;
                    114:         ctxt->disableSAX = 1;
                    115:     }
                    116:     if (extra)
                    117:         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                    118:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
                    119:                         NULL, NULL, 0, 0,
                    120:                         "Memory allocation failed : %s\n", extra);
                    121:     else
                    122:         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                    123:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
                    124:                         NULL, NULL, 0, 0, "Memory allocation failed\n");
                    125: }
                    126: 
                    127: /**
                    128:  * __xmlErrEncoding:
                    129:  * @ctxt:  an XML parser context
                    130:  * @xmlerr:  the error number
                    131:  * @msg:  the error message
                    132:  * @str1:  an string info
                    133:  * @str2:  an string info
                    134:  *
                    135:  * Handle an encoding error
                    136:  */
                    137: void
                    138: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
                    139:                  const char *msg, const xmlChar * str1, const xmlChar * str2)
                    140: {
                    141:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    142:         (ctxt->instate == XML_PARSER_EOF))
                    143:        return;
                    144:     if (ctxt != NULL)
                    145:         ctxt->errNo = xmlerr;
                    146:     __xmlRaiseError(NULL, NULL, NULL,
                    147:                     ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
                    148:                     NULL, 0, (const char *) str1, (const char *) str2,
                    149:                     NULL, 0, 0, msg, str1, str2);
                    150:     if (ctxt != NULL) {
                    151:         ctxt->wellFormed = 0;
                    152:         if (ctxt->recovery == 0)
                    153:             ctxt->disableSAX = 1;
                    154:     }
                    155: }
                    156: 
                    157: /**
                    158:  * xmlErrInternal:
                    159:  * @ctxt:  an XML parser context
                    160:  * @msg:  the error message
                    161:  * @str:  error informations
                    162:  *
                    163:  * Handle an internal error
                    164:  */
                    165: static void
                    166: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
                    167: {
                    168:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    169:         (ctxt->instate == XML_PARSER_EOF))
                    170:        return;
                    171:     if (ctxt != NULL)
                    172:         ctxt->errNo = XML_ERR_INTERNAL_ERROR;
                    173:     __xmlRaiseError(NULL, NULL, NULL,
                    174:                     ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
                    175:                     XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
                    176:                     0, 0, msg, str);
                    177:     if (ctxt != NULL) {
                    178:         ctxt->wellFormed = 0;
                    179:         if (ctxt->recovery == 0)
                    180:             ctxt->disableSAX = 1;
                    181:     }
                    182: }
                    183: 
                    184: /**
                    185:  * xmlErrEncodingInt:
                    186:  * @ctxt:  an XML parser context
                    187:  * @error:  the error number
                    188:  * @msg:  the error message
                    189:  * @val:  an integer value
                    190:  *
                    191:  * n encoding error
                    192:  */
                    193: static void
                    194: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                    195:                   const char *msg, int val)
                    196: {
                    197:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
                    198:         (ctxt->instate == XML_PARSER_EOF))
                    199:        return;
                    200:     if (ctxt != NULL)
                    201:         ctxt->errNo = error;
                    202:     __xmlRaiseError(NULL, NULL, NULL,
                    203:                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    204:                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
                    205:     if (ctxt != NULL) {
                    206:         ctxt->wellFormed = 0;
                    207:         if (ctxt->recovery == 0)
                    208:             ctxt->disableSAX = 1;
                    209:     }
                    210: }
                    211: 
                    212: /**
                    213:  * xmlIsLetter:
                    214:  * @c:  an unicode character (int)
                    215:  *
                    216:  * Check whether the character is allowed by the production
                    217:  * [84] Letter ::= BaseChar | Ideographic
                    218:  *
                    219:  * Returns 0 if not, non-zero otherwise
                    220:  */
                    221: int
                    222: xmlIsLetter(int c) {
                    223:     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
                    224: }
                    225: 
                    226: /************************************************************************
                    227:  *                                                                     *
                    228:  *             Input handling functions for progressive parsing        *
                    229:  *                                                                     *
                    230:  ************************************************************************/
                    231: 
                    232: /* #define DEBUG_INPUT */
                    233: /* #define DEBUG_STACK */
                    234: /* #define DEBUG_PUSH */
                    235: 
                    236: 
                    237: /* we need to keep enough input to show errors in context */
                    238: #define LINE_LEN        80
                    239: 
                    240: #ifdef DEBUG_INPUT
                    241: #define CHECK_BUFFER(in) check_buffer(in)
                    242: 
                    243: static
                    244: void check_buffer(xmlParserInputPtr in) {
                    245:     if (in->base != in->buf->buffer->content) {
                    246:         xmlGenericError(xmlGenericErrorContext,
                    247:                "xmlParserInput: base mismatch problem\n");
                    248:     }
                    249:     if (in->cur < in->base) {
                    250:         xmlGenericError(xmlGenericErrorContext,
                    251:                "xmlParserInput: cur < base problem\n");
                    252:     }
                    253:     if (in->cur > in->base + in->buf->buffer->use) {
                    254:         xmlGenericError(xmlGenericErrorContext,
                    255:                "xmlParserInput: cur > base + use problem\n");
                    256:     }
                    257:     xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
                    258:             (int) in, (int) in->buf->buffer->content, in->cur - in->base,
                    259:            in->buf->buffer->use, in->buf->buffer->size);
                    260: }
                    261: 
                    262: #else
                    263: #define CHECK_BUFFER(in) 
                    264: #endif
                    265: 
                    266: 
                    267: /**
                    268:  * xmlParserInputRead:
                    269:  * @in:  an XML parser input
                    270:  * @len:  an indicative size for the lookahead
                    271:  *
                    272:  * This function refresh the input for the parser. It doesn't try to
                    273:  * preserve pointers to the input buffer, and discard already read data
                    274:  *
                    275:  * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
                    276:  * end of this entity
                    277:  */
                    278: int
                    279: xmlParserInputRead(xmlParserInputPtr in, int len) {
                    280:     int ret;
                    281:     int used;
                    282:     int indx;
                    283: 
                    284:     if (in == NULL) return(-1);
                    285: #ifdef DEBUG_INPUT
                    286:     xmlGenericError(xmlGenericErrorContext, "Read\n");
                    287: #endif
                    288:     if (in->buf == NULL) return(-1);
                    289:     if (in->base == NULL) return(-1);
                    290:     if (in->cur == NULL) return(-1);
                    291:     if (in->buf->buffer == NULL) return(-1);
                    292:     if (in->buf->readcallback == NULL) return(-1);
                    293: 
                    294:     CHECK_BUFFER(in);
                    295: 
                    296:     used = in->cur - in->buf->buffer->content;
                    297:     ret = xmlBufferShrink(in->buf->buffer, used);
                    298:     if (ret > 0) {
                    299:        in->cur -= ret;
                    300:        in->consumed += ret;
                    301:     }
                    302:     ret = xmlParserInputBufferRead(in->buf, len);
                    303:     if (in->base != in->buf->buffer->content) {
                    304:         /*
                    305:         * the buffer has been reallocated
                    306:         */
                    307:        indx = in->cur - in->base;
                    308:        in->base = in->buf->buffer->content;
                    309:        in->cur = &in->buf->buffer->content[indx];
                    310:     }
                    311:     in->end = &in->buf->buffer->content[in->buf->buffer->use];
                    312: 
                    313:     CHECK_BUFFER(in);
                    314: 
                    315:     return(ret);
                    316: }
                    317: 
                    318: /**
                    319:  * xmlParserInputGrow:
                    320:  * @in:  an XML parser input
                    321:  * @len:  an indicative size for the lookahead
                    322:  *
                    323:  * This function increase the input for the parser. It tries to
                    324:  * preserve pointers to the input buffer, and keep already read data
                    325:  *
                    326:  * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
                    327:  * end of this entity
                    328:  */
                    329: int
                    330: xmlParserInputGrow(xmlParserInputPtr in, int len) {
                    331:     int ret;
                    332:     int indx;
                    333: 
                    334:     if (in == NULL) return(-1);
                    335: #ifdef DEBUG_INPUT
                    336:     xmlGenericError(xmlGenericErrorContext, "Grow\n");
                    337: #endif
                    338:     if (in->buf == NULL) return(-1);
                    339:     if (in->base == NULL) return(-1);
                    340:     if (in->cur == NULL) return(-1);
                    341:     if (in->buf->buffer == NULL) return(-1);
                    342: 
                    343:     CHECK_BUFFER(in);
                    344: 
                    345:     indx = in->cur - in->base;
                    346:     if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
                    347: 
                    348:        CHECK_BUFFER(in);
                    349: 
                    350:         return(0);
                    351:     }
                    352:     if (in->buf->readcallback != NULL)
                    353:        ret = xmlParserInputBufferGrow(in->buf, len);
                    354:     else       
                    355:         return(0);
                    356: 
                    357:     /*
                    358:      * NOTE : in->base may be a "dangling" i.e. freed pointer in this
                    359:      *        block, but we use it really as an integer to do some
                    360:      *        pointer arithmetic. Insure will raise it as a bug but in
                    361:      *        that specific case, that's not !
                    362:      */
                    363:     if (in->base != in->buf->buffer->content) {
                    364:         /*
                    365:         * the buffer has been reallocated
                    366:         */
                    367:        indx = in->cur - in->base;
                    368:        in->base = in->buf->buffer->content;
                    369:        in->cur = &in->buf->buffer->content[indx];
                    370:     }
                    371:     in->end = &in->buf->buffer->content[in->buf->buffer->use];
                    372: 
                    373:     CHECK_BUFFER(in);
                    374: 
                    375:     return(ret);
                    376: }
                    377: 
                    378: /**
                    379:  * xmlParserInputShrink:
                    380:  * @in:  an XML parser input
                    381:  *
                    382:  * This function removes used input for the parser.
                    383:  */
                    384: void
                    385: xmlParserInputShrink(xmlParserInputPtr in) {
                    386:     int used;
                    387:     int ret;
                    388:     int indx;
                    389: 
                    390: #ifdef DEBUG_INPUT
                    391:     xmlGenericError(xmlGenericErrorContext, "Shrink\n");
                    392: #endif
                    393:     if (in == NULL) return;
                    394:     if (in->buf == NULL) return;
                    395:     if (in->base == NULL) return;
                    396:     if (in->cur == NULL) return;
                    397:     if (in->buf->buffer == NULL) return;
                    398: 
                    399:     CHECK_BUFFER(in);
                    400: 
                    401:     used = in->cur - in->buf->buffer->content;
                    402:     /*
                    403:      * Do not shrink on large buffers whose only a tiny fraction
                    404:      * was consumed
                    405:      */
                    406:     if (used > INPUT_CHUNK) {
                    407:        ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
                    408:        if (ret > 0) {
                    409:            in->cur -= ret;
                    410:            in->consumed += ret;
                    411:        }
                    412:        in->end = &in->buf->buffer->content[in->buf->buffer->use];
                    413:     }
                    414: 
                    415:     CHECK_BUFFER(in);
                    416: 
                    417:     if (in->buf->buffer->use > INPUT_CHUNK) {
                    418:         return;
                    419:     }
                    420:     xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
                    421:     if (in->base != in->buf->buffer->content) {
                    422:         /*
                    423:         * the buffer has been reallocated
                    424:         */
                    425:        indx = in->cur - in->base;
                    426:        in->base = in->buf->buffer->content;
                    427:        in->cur = &in->buf->buffer->content[indx];
                    428:     }
                    429:     in->end = &in->buf->buffer->content[in->buf->buffer->use];
                    430: 
                    431:     CHECK_BUFFER(in);
                    432: }
                    433: 
                    434: /************************************************************************
                    435:  *                                                                     *
                    436:  *             UTF8 character input and related functions              *
                    437:  *                                                                     *
                    438:  ************************************************************************/
                    439: 
                    440: /**
                    441:  * xmlNextChar:
                    442:  * @ctxt:  the XML parser context
                    443:  *
                    444:  * Skip to the next char input char.
                    445:  */
                    446: 
                    447: void
                    448: xmlNextChar(xmlParserCtxtPtr ctxt)
                    449: {
                    450:     if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
                    451:         (ctxt->input == NULL))
                    452:         return;
                    453: 
                    454:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    455:         if ((*ctxt->input->cur == 0) &&
                    456:             (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
                    457:             (ctxt->instate != XML_PARSER_COMMENT)) {
                    458:             /*
                    459:              * If we are at the end of the current entity and
                    460:              * the context allows it, we pop consumed entities
                    461:              * automatically.
                    462:              * the auto closing should be blocked in other cases
                    463:              */
                    464:             xmlPopInput(ctxt);
                    465:         } else {
                    466:             const unsigned char *cur;
                    467:             unsigned char c;
                    468: 
                    469:             /*
                    470:              *   2.11 End-of-Line Handling
                    471:              *   the literal two-character sequence "#xD#xA" or a standalone
                    472:              *   literal #xD, an XML processor must pass to the application
                    473:              *   the single character #xA.
                    474:              */
                    475:             if (*(ctxt->input->cur) == '\n') {
                    476:                 ctxt->input->line++; ctxt->input->col = 1;
                    477:             } else
                    478:                 ctxt->input->col++;
                    479: 
                    480:             /*
                    481:              * We are supposed to handle UTF8, check it's valid
                    482:              * From rfc2044: encoding of the Unicode values on UTF-8:
                    483:              *
                    484:              * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    485:              * 0000 0000-0000 007F   0xxxxxxx
                    486:              * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    487:              * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    488:              *
                    489:              * Check for the 0x110000 limit too
                    490:              */
                    491:             cur = ctxt->input->cur;
                    492: 
                    493:             c = *cur;
                    494:             if (c & 0x80) {
                    495:                if (c == 0xC0)
                    496:                    goto encoding_error;
                    497:                 if (cur[1] == 0) {
                    498:                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    499:                     cur = ctxt->input->cur;
                    500:                 }
                    501:                 if ((cur[1] & 0xc0) != 0x80)
                    502:                     goto encoding_error;
                    503:                 if ((c & 0xe0) == 0xe0) {
                    504:                     unsigned int val;
                    505: 
                    506:                     if (cur[2] == 0) {
                    507:                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    508:                         cur = ctxt->input->cur;
                    509:                     }
                    510:                     if ((cur[2] & 0xc0) != 0x80)
                    511:                         goto encoding_error;
                    512:                     if ((c & 0xf0) == 0xf0) {
                    513:                         if (cur[3] == 0) {
                    514:                             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    515:                             cur = ctxt->input->cur;
                    516:                         }
                    517:                         if (((c & 0xf8) != 0xf0) ||
                    518:                             ((cur[3] & 0xc0) != 0x80))
                    519:                             goto encoding_error;
                    520:                         /* 4-byte code */
                    521:                         ctxt->input->cur += 4;
                    522:                         val = (cur[0] & 0x7) << 18;
                    523:                         val |= (cur[1] & 0x3f) << 12;
                    524:                         val |= (cur[2] & 0x3f) << 6;
                    525:                         val |= cur[3] & 0x3f;
                    526:                     } else {
                    527:                         /* 3-byte code */
                    528:                         ctxt->input->cur += 3;
                    529:                         val = (cur[0] & 0xf) << 12;
                    530:                         val |= (cur[1] & 0x3f) << 6;
                    531:                         val |= cur[2] & 0x3f;
                    532:                     }
                    533:                     if (((val > 0xd7ff) && (val < 0xe000)) ||
                    534:                         ((val > 0xfffd) && (val < 0x10000)) ||
                    535:                         (val >= 0x110000)) {
                    536:                        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    537:                                          "Char 0x%X out of allowed range\n",
                    538:                                          val);
                    539:                     }
                    540:                 } else
                    541:                     /* 2-byte code */
                    542:                     ctxt->input->cur += 2;
                    543:             } else
                    544:                 /* 1-byte code */
                    545:                 ctxt->input->cur++;
                    546: 
                    547:             ctxt->nbChars++;
                    548:             if (*ctxt->input->cur == 0)
                    549:                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    550:         }
                    551:     } else {
                    552:         /*
                    553:          * Assume it's a fixed length encoding (1) with
                    554:          * a compatible encoding for the ASCII set, since
                    555:          * XML constructs only use < 128 chars
                    556:          */
                    557: 
                    558:         if (*(ctxt->input->cur) == '\n') {
                    559:             ctxt->input->line++; ctxt->input->col = 1;
                    560:         } else
                    561:             ctxt->input->col++;
                    562:         ctxt->input->cur++;
                    563:         ctxt->nbChars++;
                    564:         if (*ctxt->input->cur == 0)
                    565:             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    566:     }
                    567:     if ((*ctxt->input->cur == '%') && (!ctxt->html))
                    568:         xmlParserHandlePEReference(ctxt);
                    569:     if ((*ctxt->input->cur == 0) &&
                    570:         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
                    571:         xmlPopInput(ctxt);
                    572:     return;
                    573: encoding_error:
                    574:     /*
                    575:      * If we detect an UTF8 error that probably mean that the
                    576:      * input encoding didn't get properly advertised in the
                    577:      * declaration header. Report the error and switch the encoding
                    578:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    579:      * encoding !)
                    580:      */
                    581:     if ((ctxt == NULL) || (ctxt->input == NULL) ||
                    582:         (ctxt->input->end - ctxt->input->cur < 4)) {
                    583:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    584:                     "Input is not proper UTF-8, indicate encoding !\n",
                    585:                     NULL, NULL);
                    586:     } else {
                    587:         char buffer[150];
                    588: 
                    589:        snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    590:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    591:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    592:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    593:                     "Input is not proper UTF-8, indicate encoding !\n%s",
                    594:                     BAD_CAST buffer, NULL);
                    595:     }
                    596:     ctxt->charset = XML_CHAR_ENCODING_8859_1;
                    597:     ctxt->input->cur++;
                    598:     return;
                    599: }
                    600: 
                    601: /**
                    602:  * xmlCurrentChar:
                    603:  * @ctxt:  the XML parser context
                    604:  * @len:  pointer to the length of the char read
                    605:  *
                    606:  * The current char value, if using UTF-8 this may actually span multiple
                    607:  * bytes in the input buffer. Implement the end of line normalization:
                    608:  * 2.11 End-of-Line Handling
                    609:  * Wherever an external parsed entity or the literal entity value
                    610:  * of an internal parsed entity contains either the literal two-character
                    611:  * sequence "#xD#xA" or a standalone literal #xD, an XML processor
                    612:  * must pass to the application the single character #xA.
                    613:  * This behavior can conveniently be produced by normalizing all
                    614:  * line breaks to #xA on input, before parsing.)
                    615:  *
                    616:  * Returns the current char value and its length
                    617:  */
                    618: 
                    619: int
                    620: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                    621:     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
                    622:     if (ctxt->instate == XML_PARSER_EOF)
                    623:        return(0);
                    624: 
                    625:     if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
                    626:            *len = 1;
                    627:            return((int) *ctxt->input->cur);
                    628:     }
                    629:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    630:        /*
                    631:         * We are supposed to handle UTF8, check it's valid
                    632:         * From rfc2044: encoding of the Unicode values on UTF-8:
                    633:         *
                    634:         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    635:         * 0000 0000-0000 007F   0xxxxxxx
                    636:         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    637:         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    638:         *
                    639:         * Check for the 0x110000 limit too
                    640:         */
                    641:        const unsigned char *cur = ctxt->input->cur;
                    642:        unsigned char c;
                    643:        unsigned int val;
                    644: 
                    645:        c = *cur;
                    646:        if (c & 0x80) {
                    647:            if (((c & 0x40) == 0) || (c == 0xC0))
                    648:                goto encoding_error;
                    649:            if (cur[1] == 0) {
                    650:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    651:                 cur = ctxt->input->cur;
                    652:             }
                    653:            if ((cur[1] & 0xc0) != 0x80)
                    654:                goto encoding_error;
                    655:            if ((c & 0xe0) == 0xe0) {
                    656:                if (cur[2] == 0) {
                    657:                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    658:                     cur = ctxt->input->cur;
                    659:                 }
                    660:                if ((cur[2] & 0xc0) != 0x80)
                    661:                    goto encoding_error;
                    662:                if ((c & 0xf0) == 0xf0) {
                    663:                    if (cur[3] == 0) {
                    664:                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    665:                         cur = ctxt->input->cur;
                    666:                     }
                    667:                    if (((c & 0xf8) != 0xf0) ||
                    668:                        ((cur[3] & 0xc0) != 0x80))
                    669:                        goto encoding_error;
                    670:                    /* 4-byte code */
                    671:                    *len = 4;
                    672:                    val = (cur[0] & 0x7) << 18;
                    673:                    val |= (cur[1] & 0x3f) << 12;
                    674:                    val |= (cur[2] & 0x3f) << 6;
                    675:                    val |= cur[3] & 0x3f;
                    676:                    if (val < 0x10000)
                    677:                        goto encoding_error;
                    678:                } else {
                    679:                  /* 3-byte code */
                    680:                    *len = 3;
                    681:                    val = (cur[0] & 0xf) << 12;
                    682:                    val |= (cur[1] & 0x3f) << 6;
                    683:                    val |= cur[2] & 0x3f;
                    684:                    if (val < 0x800)
                    685:                        goto encoding_error;
                    686:                }
                    687:            } else {
                    688:              /* 2-byte code */
                    689:                *len = 2;
                    690:                val = (cur[0] & 0x1f) << 6;
                    691:                val |= cur[1] & 0x3f;
                    692:                if (val < 0x80)
                    693:                    goto encoding_error;
                    694:            }
                    695:            if (!IS_CHAR(val)) {
                    696:                xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    697:                                  "Char 0x%X out of allowed range\n", val);
                    698:            }    
                    699:            return(val);
                    700:        } else {
                    701:            /* 1-byte code */
                    702:            *len = 1;
                    703:            if (*ctxt->input->cur == 0)
                    704:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    705:            if ((*ctxt->input->cur == 0) &&
                    706:                (ctxt->input->end > ctxt->input->cur)) {
                    707:                xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    708:                                  "Char 0x0 out of allowed range\n", 0);
                    709:            }
                    710:            if (*ctxt->input->cur == 0xD) {
                    711:                if (ctxt->input->cur[1] == 0xA) {
                    712:                    ctxt->nbChars++;
                    713:                    ctxt->input->cur++;
                    714:                }
                    715:                return(0xA);
                    716:            }
                    717:            return((int) *ctxt->input->cur);
                    718:        }
                    719:     }
                    720:     /*
                    721:      * Assume it's a fixed length encoding (1) with
                    722:      * a compatible encoding for the ASCII set, since
                    723:      * XML constructs only use < 128 chars
                    724:      */
                    725:     *len = 1;
                    726:     if (*ctxt->input->cur == 0xD) {
                    727:        if (ctxt->input->cur[1] == 0xA) {
                    728:            ctxt->nbChars++;
                    729:            ctxt->input->cur++;
                    730:        }
                    731:        return(0xA);
                    732:     }
                    733:     return((int) *ctxt->input->cur);
                    734: encoding_error:
                    735:     /*
                    736:      * An encoding problem may arise from a truncated input buffer
                    737:      * splitting a character in the middle. In that case do not raise
                    738:      * an error but return 0 to endicate an end of stream problem
                    739:      */
                    740:     if (ctxt->input->end - ctxt->input->cur < 4) {
                    741:        *len = 0;
                    742:        return(0);
                    743:     }
                    744: 
                    745:     /*
                    746:      * If we detect an UTF8 error that probably mean that the
                    747:      * input encoding didn't get properly advertised in the
                    748:      * declaration header. Report the error and switch the encoding
                    749:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    750:      * encoding !)
                    751:      */
                    752:     {
                    753:         char buffer[150];
                    754: 
                    755:        snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    756:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    757:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    758:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    759:                     "Input is not proper UTF-8, indicate encoding !\n%s",
                    760:                     BAD_CAST buffer, NULL);
                    761:     }
                    762:     ctxt->charset = XML_CHAR_ENCODING_8859_1; 
                    763:     *len = 1;
                    764:     return((int) *ctxt->input->cur);
                    765: }
                    766: 
                    767: /**
                    768:  * xmlStringCurrentChar:
                    769:  * @ctxt:  the XML parser context
                    770:  * @cur:  pointer to the beginning of the char
                    771:  * @len:  pointer to the length of the char read
                    772:  *
                    773:  * The current char value, if using UTF-8 this may actually span multiple
                    774:  * bytes in the input buffer.
                    775:  *
                    776:  * Returns the current char value and its length
                    777:  */
                    778: 
                    779: int
                    780: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
                    781: {
                    782:     if ((len == NULL) || (cur == NULL)) return(0);
                    783:     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
                    784:         /*
                    785:          * We are supposed to handle UTF8, check it's valid
                    786:          * From rfc2044: encoding of the Unicode values on UTF-8:
                    787:          *
                    788:          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    789:          * 0000 0000-0000 007F   0xxxxxxx
                    790:          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    791:          * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    792:          *
                    793:          * Check for the 0x110000 limit too
                    794:          */
                    795:         unsigned char c;
                    796:         unsigned int val;
                    797: 
                    798:         c = *cur;
                    799:         if (c & 0x80) {
                    800:             if ((cur[1] & 0xc0) != 0x80)
                    801:                 goto encoding_error;
                    802:             if ((c & 0xe0) == 0xe0) {
                    803: 
                    804:                 if ((cur[2] & 0xc0) != 0x80)
                    805:                     goto encoding_error;
                    806:                 if ((c & 0xf0) == 0xf0) {
                    807:                     if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
                    808:                         goto encoding_error;
                    809:                     /* 4-byte code */
                    810:                     *len = 4;
                    811:                     val = (cur[0] & 0x7) << 18;
                    812:                     val |= (cur[1] & 0x3f) << 12;
                    813:                     val |= (cur[2] & 0x3f) << 6;
                    814:                     val |= cur[3] & 0x3f;
                    815:                 } else {
                    816:                     /* 3-byte code */
                    817:                     *len = 3;
                    818:                     val = (cur[0] & 0xf) << 12;
                    819:                     val |= (cur[1] & 0x3f) << 6;
                    820:                     val |= cur[2] & 0x3f;
                    821:                 }
                    822:             } else {
                    823:                 /* 2-byte code */
                    824:                 *len = 2;
                    825:                 val = (cur[0] & 0x1f) << 6;
                    826:                 val |= cur[1] & 0x3f;
                    827:             }
                    828:             if (!IS_CHAR(val)) {
                    829:                xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                    830:                                  "Char 0x%X out of allowed range\n", val);
                    831:             }
                    832:             return (val);
                    833:         } else {
                    834:             /* 1-byte code */
                    835:             *len = 1;
                    836:             return ((int) *cur);
                    837:         }
                    838:     }
                    839:     /*
                    840:      * Assume it's a fixed length encoding (1) with
                    841:      * a compatible encoding for the ASCII set, since
                    842:      * XML constructs only use < 128 chars
                    843:      */
                    844:     *len = 1;
                    845:     return ((int) *cur);
                    846: encoding_error:
                    847: 
                    848:     /*
                    849:      * An encoding problem may arise from a truncated input buffer
                    850:      * splitting a character in the middle. In that case do not raise
                    851:      * an error but return 0 to endicate an end of stream problem
                    852:      */
                    853:     if ((ctxt == NULL) || (ctxt->input == NULL) ||
                    854:         (ctxt->input->end - ctxt->input->cur < 4)) {
                    855:        *len = 0;
                    856:        return(0);
                    857:     }
                    858:     /*
                    859:      * If we detect an UTF8 error that probably mean that the
                    860:      * input encoding didn't get properly advertised in the
                    861:      * declaration header. Report the error and switch the encoding
                    862:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    863:      * encoding !)
                    864:      */
                    865:     {
                    866:         char buffer[150];
                    867: 
                    868:        snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    869:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    870:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    871:        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                    872:                     "Input is not proper UTF-8, indicate encoding !\n%s",
                    873:                     BAD_CAST buffer, NULL);
                    874:     }
                    875:     *len = 1;
                    876:     return ((int) *cur);
                    877: }
                    878: 
                    879: /**
                    880:  * xmlCopyCharMultiByte:
                    881:  * @out:  pointer to an array of xmlChar
                    882:  * @val:  the char value
                    883:  *
                    884:  * append the char value in the array 
                    885:  *
                    886:  * Returns the number of xmlChar written
                    887:  */
                    888: int
                    889: xmlCopyCharMultiByte(xmlChar *out, int val) {
                    890:     if (out == NULL) return(0);
                    891:     /*
                    892:      * We are supposed to handle UTF8, check it's valid
                    893:      * From rfc2044: encoding of the Unicode values on UTF-8:
                    894:      *
                    895:      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    896:      * 0000 0000-0000 007F   0xxxxxxx
                    897:      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    898:      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    899:      */
                    900:     if  (val >= 0x80) {
                    901:        xmlChar *savedout = out;
                    902:        int bits;
                    903:        if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
                    904:        else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
                    905:        else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
                    906:        else {
                    907:            xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
                    908:                    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
                    909:                              val);
                    910:            return(0);
                    911:        }
                    912:        for ( ; bits >= 0; bits-= 6)
                    913:            *out++= ((val >> bits) & 0x3F) | 0x80 ;
                    914:        return (out - savedout);
                    915:     }
                    916:     *out = (xmlChar) val;
                    917:     return 1;
                    918: }
                    919: 
                    920: /**
                    921:  * xmlCopyChar:
                    922:  * @len:  Ignored, compatibility
                    923:  * @out:  pointer to an array of xmlChar
                    924:  * @val:  the char value
                    925:  *
                    926:  * append the char value in the array 
                    927:  *
                    928:  * Returns the number of xmlChar written
                    929:  */
                    930: 
                    931: int
                    932: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
                    933:     if (out == NULL) return(0);
                    934:     /* the len parameter is ignored */
                    935:     if  (val >= 0x80) {
                    936:        return(xmlCopyCharMultiByte (out, val));
                    937:     }
                    938:     *out = (xmlChar) val;
                    939:     return 1;
                    940: }
                    941: 
                    942: /************************************************************************
                    943:  *                                                                     *
                    944:  *             Commodity functions to switch encodings                 *
                    945:  *                                                                     *
                    946:  ************************************************************************/
                    947: 
                    948: /* defined in encoding.c, not public */
                    949: int
                    950: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                    951:                        xmlBufferPtr in, int len);
                    952: 
                    953: static int
                    954: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
                    955:                        xmlCharEncodingHandlerPtr handler, int len);
                    956: static int
                    957: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                    958:                           xmlCharEncodingHandlerPtr handler, int len);
                    959: /**
                    960:  * xmlSwitchEncoding:
                    961:  * @ctxt:  the parser context
                    962:  * @enc:  the encoding value (number)
                    963:  *
                    964:  * change the input functions when discovering the character encoding
                    965:  * of a given entity.
                    966:  *
                    967:  * Returns 0 in case of success, -1 otherwise
                    968:  */
                    969: int
                    970: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
                    971: {
                    972:     xmlCharEncodingHandlerPtr handler;
                    973:     int len = -1;
                    974: 
                    975:     if (ctxt == NULL) return(-1);
                    976:     switch (enc) {
                    977:        case XML_CHAR_ENCODING_ERROR:
                    978:            __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
                    979:                           "encoding unknown\n", NULL, NULL);
                    980:            return(-1);
                    981:        case XML_CHAR_ENCODING_NONE:
                    982:            /* let's assume it's UTF-8 without the XML decl */
                    983:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    984:            return(0);
                    985:        case XML_CHAR_ENCODING_UTF8:
                    986:            /* default encoding, no conversion should be needed */
                    987:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    988: 
                    989:            /*
                    990:             * Errata on XML-1.0 June 20 2001
                    991:             * Specific handling of the Byte Order Mark for
                    992:             * UTF-8
                    993:             */
                    994:            if ((ctxt->input != NULL) &&
                    995:                (ctxt->input->cur[0] == 0xEF) &&
                    996:                (ctxt->input->cur[1] == 0xBB) &&
                    997:                (ctxt->input->cur[2] == 0xBF)) {
                    998:                ctxt->input->cur += 3;
                    999:            }
                   1000:            return(0);
                   1001:     case XML_CHAR_ENCODING_UTF16LE:
                   1002:     case XML_CHAR_ENCODING_UTF16BE:
                   1003:         /*The raw input characters are encoded
                   1004:          *in UTF-16. As we expect this function
                   1005:          *to be called after xmlCharEncInFunc, we expect
                   1006:          *ctxt->input->cur to contain UTF-8 encoded characters.
                   1007:          *So the raw UTF16 Byte Order Mark
                   1008:          *has also been converted into
                   1009:          *an UTF-8 BOM. Let's skip that BOM.
                   1010:          */
                   1011:         if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
                   1012:             (ctxt->input->cur[0] == 0xEF) &&
                   1013:             (ctxt->input->cur[1] == 0xBB) &&
                   1014:             (ctxt->input->cur[2] == 0xBF)) {
                   1015:             ctxt->input->cur += 3;
                   1016:         }
                   1017:         len = 90;
                   1018:        break;
                   1019:     case XML_CHAR_ENCODING_UCS2:
                   1020:         len = 90;
                   1021:        break;
                   1022:     case XML_CHAR_ENCODING_UCS4BE:
                   1023:     case XML_CHAR_ENCODING_UCS4LE:
                   1024:     case XML_CHAR_ENCODING_UCS4_2143:
                   1025:     case XML_CHAR_ENCODING_UCS4_3412:
                   1026:         len = 180;
                   1027:        break;
                   1028:     case XML_CHAR_ENCODING_EBCDIC:
                   1029:     case XML_CHAR_ENCODING_8859_1:
                   1030:     case XML_CHAR_ENCODING_8859_2:
                   1031:     case XML_CHAR_ENCODING_8859_3:
                   1032:     case XML_CHAR_ENCODING_8859_4:
                   1033:     case XML_CHAR_ENCODING_8859_5:
                   1034:     case XML_CHAR_ENCODING_8859_6:
                   1035:     case XML_CHAR_ENCODING_8859_7:
                   1036:     case XML_CHAR_ENCODING_8859_8:
                   1037:     case XML_CHAR_ENCODING_8859_9:
                   1038:     case XML_CHAR_ENCODING_ASCII:
                   1039:     case XML_CHAR_ENCODING_2022_JP:
                   1040:     case XML_CHAR_ENCODING_SHIFT_JIS:
                   1041:     case XML_CHAR_ENCODING_EUC_JP:
                   1042:         len = 45;
                   1043:        break;
                   1044:     }
                   1045:     handler = xmlGetCharEncodingHandler(enc);
                   1046:     if (handler == NULL) {
                   1047:        /*
                   1048:         * Default handlers.
                   1049:         */
                   1050:        switch (enc) {
                   1051:            case XML_CHAR_ENCODING_ASCII:
                   1052:                /* default encoding, no conversion should be needed */
                   1053:                ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1054:                return(0);
                   1055:            case XML_CHAR_ENCODING_UTF16LE:
                   1056:                break;
                   1057:            case XML_CHAR_ENCODING_UTF16BE:
                   1058:                break;
                   1059:            case XML_CHAR_ENCODING_UCS4LE:
                   1060:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1061:                               "encoding not supported %s\n",
                   1062:                               BAD_CAST "USC4 little endian", NULL);
                   1063:                break;
                   1064:            case XML_CHAR_ENCODING_UCS4BE:
                   1065:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1066:                               "encoding not supported %s\n",
                   1067:                               BAD_CAST "USC4 big endian", NULL);
                   1068:                break;
                   1069:            case XML_CHAR_ENCODING_EBCDIC:
                   1070:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1071:                               "encoding not supported %s\n",
                   1072:                               BAD_CAST "EBCDIC", NULL);
                   1073:                break;
                   1074:            case XML_CHAR_ENCODING_UCS4_2143:
                   1075:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1076:                               "encoding not supported %s\n",
                   1077:                               BAD_CAST "UCS4 2143", NULL);
                   1078:                break;
                   1079:            case XML_CHAR_ENCODING_UCS4_3412:
                   1080:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1081:                               "encoding not supported %s\n",
                   1082:                               BAD_CAST "UCS4 3412", NULL);
                   1083:                break;
                   1084:            case XML_CHAR_ENCODING_UCS2:
                   1085:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1086:                               "encoding not supported %s\n",
                   1087:                               BAD_CAST "UCS2", NULL);
                   1088:                break;
                   1089:            case XML_CHAR_ENCODING_8859_1:
                   1090:            case XML_CHAR_ENCODING_8859_2:
                   1091:            case XML_CHAR_ENCODING_8859_3:
                   1092:            case XML_CHAR_ENCODING_8859_4:
                   1093:            case XML_CHAR_ENCODING_8859_5:
                   1094:            case XML_CHAR_ENCODING_8859_6:
                   1095:            case XML_CHAR_ENCODING_8859_7:
                   1096:            case XML_CHAR_ENCODING_8859_8:
                   1097:            case XML_CHAR_ENCODING_8859_9:
                   1098:                /*
                   1099:                 * We used to keep the internal content in the
                   1100:                 * document encoding however this turns being unmaintainable
                   1101:                 * So xmlGetCharEncodingHandler() will return non-null
                   1102:                 * values for this now.
                   1103:                 */
                   1104:                if ((ctxt->inputNr == 1) &&
                   1105:                    (ctxt->encoding == NULL) &&
                   1106:                    (ctxt->input != NULL) &&
                   1107:                    (ctxt->input->encoding != NULL)) {
                   1108:                    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
                   1109:                }
                   1110:                ctxt->charset = enc;
                   1111:                return(0);
                   1112:            case XML_CHAR_ENCODING_2022_JP:
                   1113:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1114:                               "encoding not supported %s\n",
                   1115:                               BAD_CAST "ISO-2022-JP", NULL);
                   1116:                break;
                   1117:            case XML_CHAR_ENCODING_SHIFT_JIS:
                   1118:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1119:                               "encoding not supported %s\n",
                   1120:                               BAD_CAST "Shift_JIS", NULL);
                   1121:                break;
                   1122:            case XML_CHAR_ENCODING_EUC_JP:
                   1123:                __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
                   1124:                               "encoding not supported %s\n",
                   1125:                               BAD_CAST "EUC-JP", NULL);
                   1126:                break;
                   1127:            default:
                   1128:                break;
                   1129:        }
                   1130:     }
                   1131:     if (handler == NULL)
                   1132:        return(-1);
                   1133:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1134:     return(xmlSwitchToEncodingInt(ctxt, handler, len));
                   1135: }
                   1136: 
                   1137: /**
                   1138:  * xmlSwitchInputEncoding:
                   1139:  * @ctxt:  the parser context
                   1140:  * @input:  the input stream
                   1141:  * @handler:  the encoding handler
                   1142:  * @len:  the number of bytes to convert for the first line or -1
                   1143:  *
                   1144:  * change the input functions when discovering the character encoding
                   1145:  * of a given entity.
                   1146:  *
                   1147:  * Returns 0 in case of success, -1 otherwise
                   1148:  */
                   1149: static int
                   1150: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                   1151:                           xmlCharEncodingHandlerPtr handler, int len)
                   1152: {
                   1153:     int nbchars;
                   1154: 
                   1155:     if (handler == NULL)
                   1156:         return (-1);
                   1157:     if (input == NULL)
                   1158:         return (-1);
                   1159:     if (input->buf != NULL) {
                   1160:         if (input->buf->encoder != NULL) {
                   1161:             /*
                   1162:              * Check in case the auto encoding detetection triggered
                   1163:              * in already.
                   1164:              */
                   1165:             if (input->buf->encoder == handler)
                   1166:                 return (0);
                   1167: 
                   1168:             /*
                   1169:              * "UTF-16" can be used for both LE and BE
                   1170:              if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
                   1171:              BAD_CAST "UTF-16", 6)) &&
                   1172:              (!xmlStrncmp(BAD_CAST handler->name,
                   1173:              BAD_CAST "UTF-16", 6))) {
                   1174:              return(0);
                   1175:              }
                   1176:              */
                   1177: 
                   1178:             /*
                   1179:              * Note: this is a bit dangerous, but that's what it
                   1180:              * takes to use nearly compatible signature for different
                   1181:              * encodings.
                   1182:              */
                   1183:             xmlCharEncCloseFunc(input->buf->encoder);
                   1184:             input->buf->encoder = handler;
                   1185:             return (0);
                   1186:         }
                   1187:         input->buf->encoder = handler;
                   1188: 
                   1189:         /*
                   1190:          * Is there already some content down the pipe to convert ?
                   1191:          */
                   1192:         if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
                   1193:             int processed;
                   1194:            unsigned int use;
                   1195: 
                   1196:             /*
                   1197:              * Specific handling of the Byte Order Mark for 
                   1198:              * UTF-16
                   1199:              */
                   1200:             if ((handler->name != NULL) &&
                   1201:                 (!strcmp(handler->name, "UTF-16LE") ||
                   1202:                  !strcmp(handler->name, "UTF-16")) &&
                   1203:                 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
                   1204:                 input->cur += 2;
                   1205:             }
                   1206:             if ((handler->name != NULL) &&
                   1207:                 (!strcmp(handler->name, "UTF-16BE")) &&
                   1208:                 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
                   1209:                 input->cur += 2;
                   1210:             }
                   1211:             /*
                   1212:              * Errata on XML-1.0 June 20 2001
                   1213:              * Specific handling of the Byte Order Mark for
                   1214:              * UTF-8
                   1215:              */
                   1216:             if ((handler->name != NULL) &&
                   1217:                 (!strcmp(handler->name, "UTF-8")) &&
                   1218:                 (input->cur[0] == 0xEF) &&
                   1219:                 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
                   1220:                 input->cur += 3;
                   1221:             }
                   1222: 
                   1223:             /*
                   1224:              * Shrink the current input buffer.
                   1225:              * Move it as the raw buffer and create a new input buffer
                   1226:              */
                   1227:             processed = input->cur - input->base;
                   1228:             xmlBufferShrink(input->buf->buffer, processed);
                   1229:             input->buf->raw = input->buf->buffer;
                   1230:             input->buf->buffer = xmlBufferCreate();
                   1231:            input->buf->rawconsumed = processed;
                   1232:            use = input->buf->raw->use;
                   1233: 
                   1234:             if (ctxt->html) {
                   1235:                 /*
                   1236:                  * convert as much as possible of the buffer
                   1237:                  */
                   1238:                 nbchars = xmlCharEncInFunc(input->buf->encoder,
                   1239:                                            input->buf->buffer,
                   1240:                                            input->buf->raw);
                   1241:             } else {
                   1242:                 /*
                   1243:                  * convert just enough to get
                   1244:                  * '<?xml version="1.0" encoding="xxx"?>'
                   1245:                  * parsed with the autodetected encoding
                   1246:                  * into the parser reading buffer.
                   1247:                  */
                   1248:                 nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
                   1249:                                                  input->buf->buffer,
                   1250:                                                  input->buf->raw,
                   1251:                                                  len);
                   1252:             }
                   1253:             if (nbchars < 0) {
                   1254:                 xmlErrInternal(ctxt,
                   1255:                                "switching encoding: encoder error\n",
                   1256:                                NULL);
                   1257:                 return (-1);
                   1258:             }
                   1259:            input->buf->rawconsumed += use - input->buf->raw->use;
                   1260:             input->base = input->cur = input->buf->buffer->content;
                   1261:             input->end = &input->base[input->buf->buffer->use];
                   1262: 
                   1263:         }
                   1264:         return (0);
                   1265:     } else if (input->length == 0) {
                   1266:        /*
                   1267:         * When parsing a static memory array one must know the
                   1268:         * size to be able to convert the buffer.
                   1269:         */
                   1270:        xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
                   1271:        return (-1);
                   1272:     }
                   1273:     return (0);
                   1274: }
                   1275: 
                   1276: /**
                   1277:  * xmlSwitchInputEncoding:
                   1278:  * @ctxt:  the parser context
                   1279:  * @input:  the input stream
                   1280:  * @handler:  the encoding handler
                   1281:  *
                   1282:  * change the input functions when discovering the character encoding
                   1283:  * of a given entity.
                   1284:  *
                   1285:  * Returns 0 in case of success, -1 otherwise
                   1286:  */
                   1287: int
                   1288: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                   1289:                           xmlCharEncodingHandlerPtr handler) {
                   1290:     return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
                   1291: }
                   1292: 
                   1293: /**
                   1294:  * xmlSwitchToEncodingInt:
                   1295:  * @ctxt:  the parser context
                   1296:  * @handler:  the encoding handler
                   1297:  * @len: the lenght to convert or -1
                   1298:  *
                   1299:  * change the input functions when discovering the character encoding
                   1300:  * of a given entity, and convert only @len bytes of the output, this
                   1301:  * is needed on auto detect to allows any declared encoding later to
                   1302:  * convert the actual content after the xmlDecl
                   1303:  *
                   1304:  * Returns 0 in case of success, -1 otherwise
                   1305:  */
                   1306: static int
                   1307: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
                   1308:                        xmlCharEncodingHandlerPtr handler, int len) {
                   1309:     int ret = 0;
                   1310: 
                   1311:     if (handler != NULL) {
                   1312:         if (ctxt->input != NULL) {
                   1313:            ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
                   1314:        } else {
                   1315:            xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
                   1316:                           NULL);
                   1317:            return(-1);
                   1318:        }
                   1319:        /*
                   1320:         * The parsing is now done in UTF8 natively
                   1321:         */
                   1322:        ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1323:     } else
                   1324:        return(-1);
                   1325:     return(ret);
                   1326: }
                   1327: 
                   1328: /**
                   1329:  * xmlSwitchToEncoding:
                   1330:  * @ctxt:  the parser context
                   1331:  * @handler:  the encoding handler
                   1332:  *
                   1333:  * change the input functions when discovering the character encoding
                   1334:  * of a given entity.
                   1335:  *
                   1336:  * Returns 0 in case of success, -1 otherwise
                   1337:  */
                   1338: int
                   1339: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
                   1340: {
                   1341:     return (xmlSwitchToEncodingInt(ctxt, handler, -1));
                   1342: }
                   1343: 
                   1344: /************************************************************************
                   1345:  *                                                                     *
                   1346:  *     Commodity functions to handle entities processing               *
                   1347:  *                                                                     *
                   1348:  ************************************************************************/
                   1349: 
                   1350: /**
                   1351:  * xmlFreeInputStream:
                   1352:  * @input:  an xmlParserInputPtr
                   1353:  *
                   1354:  * Free up an input stream.
                   1355:  */
                   1356: void
                   1357: xmlFreeInputStream(xmlParserInputPtr input) {
                   1358:     if (input == NULL) return;
                   1359: 
                   1360:     if (input->filename != NULL) xmlFree((char *) input->filename);
                   1361:     if (input->directory != NULL) xmlFree((char *) input->directory);
                   1362:     if (input->encoding != NULL) xmlFree((char *) input->encoding);
                   1363:     if (input->version != NULL) xmlFree((char *) input->version);
                   1364:     if ((input->free != NULL) && (input->base != NULL))
                   1365:         input->free((xmlChar *) input->base);
                   1366:     if (input->buf != NULL) 
                   1367:         xmlFreeParserInputBuffer(input->buf);
                   1368:     xmlFree(input);
                   1369: }
                   1370: 
                   1371: /**
                   1372:  * xmlNewInputStream:
                   1373:  * @ctxt:  an XML parser context
                   1374:  *
1.1.1.2 ! misho    1375:  * Create a new input stream structure.
        !          1376:  *
1.1       misho    1377:  * Returns the new input stream or NULL
                   1378:  */
                   1379: xmlParserInputPtr
                   1380: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
                   1381:     xmlParserInputPtr input;
                   1382: 
                   1383:     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
                   1384:     if (input == NULL) {
                   1385:         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
                   1386:        return(NULL);
                   1387:     }
                   1388:     memset(input, 0, sizeof(xmlParserInput));
                   1389:     input->line = 1;
                   1390:     input->col = 1;
                   1391:     input->standalone = -1;
1.1.1.2 ! misho    1392: 
1.1       misho    1393:     /*
1.1.1.2 ! misho    1394:      * If the context is NULL the id cannot be initialized, but that
        !          1395:      * should not happen while parsing which is the situation where
        !          1396:      * the id is actually needed.
1.1       misho    1397:      */
1.1.1.2 ! misho    1398:     if (ctxt != NULL)
        !          1399:         input->id = ctxt->input_id++;
        !          1400: 
1.1       misho    1401:     return(input);
                   1402: }
                   1403: 
                   1404: /**
                   1405:  * xmlNewIOInputStream:
                   1406:  * @ctxt:  an XML parser context
                   1407:  * @input:  an I/O Input
                   1408:  * @enc:  the charset encoding if known
                   1409:  *
                   1410:  * Create a new input stream structure encapsulating the @input into
                   1411:  * a stream suitable for the parser.
                   1412:  *
                   1413:  * Returns the new input stream or NULL
                   1414:  */
                   1415: xmlParserInputPtr
                   1416: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
                   1417:                    xmlCharEncoding enc) {
                   1418:     xmlParserInputPtr inputStream;
                   1419: 
                   1420:     if (input == NULL) return(NULL);
                   1421:     if (xmlParserDebugEntities)
                   1422:        xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
                   1423:     inputStream = xmlNewInputStream(ctxt);
                   1424:     if (inputStream == NULL) {
                   1425:        return(NULL);
                   1426:     }
                   1427:     inputStream->filename = NULL;
                   1428:     inputStream->buf = input;
                   1429:     inputStream->base = inputStream->buf->buffer->content;
                   1430:     inputStream->cur = inputStream->buf->buffer->content;
                   1431:     inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
                   1432:     if (enc != XML_CHAR_ENCODING_NONE) {
                   1433:         xmlSwitchEncoding(ctxt, enc);
                   1434:     }
                   1435: 
                   1436:     return(inputStream);
                   1437: }
                   1438: 
                   1439: /**
                   1440:  * xmlNewEntityInputStream:
                   1441:  * @ctxt:  an XML parser context
                   1442:  * @entity:  an Entity pointer
                   1443:  *
                   1444:  * Create a new input stream based on an xmlEntityPtr
                   1445:  *
                   1446:  * Returns the new input stream or NULL
                   1447:  */
                   1448: xmlParserInputPtr
                   1449: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
                   1450:     xmlParserInputPtr input;
                   1451: 
                   1452:     if (entity == NULL) {
                   1453:         xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
                   1454:                       NULL);
                   1455:        return(NULL);
                   1456:     }
                   1457:     if (xmlParserDebugEntities)
                   1458:        xmlGenericError(xmlGenericErrorContext,
                   1459:                "new input from entity: %s\n", entity->name);
                   1460:     if (entity->content == NULL) {
                   1461:        switch (entity->etype) {
                   1462:             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
                   1463:                xmlErrInternal(ctxt, "Cannot parse entity %s\n",
                   1464:                               entity->name);
                   1465:                 break;
                   1466:             case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
                   1467:             case XML_EXTERNAL_PARAMETER_ENTITY:
                   1468:                return(xmlLoadExternalEntity((char *) entity->URI,
                   1469:                       (char *) entity->ExternalID, ctxt));
                   1470:             case XML_INTERNAL_GENERAL_ENTITY:
                   1471:                xmlErrInternal(ctxt,
                   1472:                      "Internal entity %s without content !\n",
                   1473:                               entity->name);
                   1474:                 break;
                   1475:             case XML_INTERNAL_PARAMETER_ENTITY:
                   1476:                xmlErrInternal(ctxt,
                   1477:                      "Internal parameter entity %s without content !\n",
                   1478:                               entity->name);
                   1479:                 break;
                   1480:             case XML_INTERNAL_PREDEFINED_ENTITY:
                   1481:                xmlErrInternal(ctxt,
                   1482:                      "Predefined entity %s without content !\n",
                   1483:                               entity->name);
                   1484:                 break;
                   1485:        }
                   1486:        return(NULL);
                   1487:     }
                   1488:     input = xmlNewInputStream(ctxt);
                   1489:     if (input == NULL) {
                   1490:        return(NULL);
                   1491:     }
                   1492:     if (entity->URI != NULL)
                   1493:        input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
                   1494:     input->base = entity->content;
                   1495:     input->cur = entity->content;
                   1496:     input->length = entity->length;
                   1497:     input->end = &entity->content[input->length];
                   1498:     return(input);
                   1499: }
                   1500: 
                   1501: /**
                   1502:  * xmlNewStringInputStream:
                   1503:  * @ctxt:  an XML parser context
                   1504:  * @buffer:  an memory buffer
                   1505:  *
                   1506:  * Create a new input stream based on a memory buffer.
                   1507:  * Returns the new input stream
                   1508:  */
                   1509: xmlParserInputPtr
                   1510: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
                   1511:     xmlParserInputPtr input;
                   1512: 
                   1513:     if (buffer == NULL) {
                   1514:         xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
                   1515:                       NULL);
                   1516:        return(NULL);
                   1517:     }
                   1518:     if (xmlParserDebugEntities)
                   1519:        xmlGenericError(xmlGenericErrorContext,
                   1520:                "new fixed input: %.30s\n", buffer);
                   1521:     input = xmlNewInputStream(ctxt);
                   1522:     if (input == NULL) {
                   1523:         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
                   1524:        return(NULL);
                   1525:     }
                   1526:     input->base = buffer;
                   1527:     input->cur = buffer;
                   1528:     input->length = xmlStrlen(buffer);
                   1529:     input->end = &buffer[input->length];
                   1530:     return(input);
                   1531: }
                   1532: 
                   1533: /**
                   1534:  * xmlNewInputFromFile:
                   1535:  * @ctxt:  an XML parser context
                   1536:  * @filename:  the filename to use as entity
                   1537:  *
                   1538:  * Create a new input stream based on a file or an URL.
                   1539:  *
                   1540:  * Returns the new input stream or NULL in case of error
                   1541:  */
                   1542: xmlParserInputPtr
                   1543: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
                   1544:     xmlParserInputBufferPtr buf;
                   1545:     xmlParserInputPtr inputStream;
                   1546:     char *directory = NULL;
                   1547:     xmlChar *URI = NULL;
                   1548: 
                   1549:     if (xmlParserDebugEntities)
                   1550:        xmlGenericError(xmlGenericErrorContext,
                   1551:                "new input from file: %s\n", filename);
                   1552:     if (ctxt == NULL) return(NULL);
                   1553:     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
                   1554:     if (buf == NULL) {
                   1555:        if (filename == NULL)
                   1556:            __xmlLoaderErr(ctxt,
                   1557:                           "failed to load external entity: NULL filename \n",
                   1558:                           NULL);
                   1559:        else
                   1560:            __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
                   1561:                           (const char *) filename);
                   1562:        return(NULL);
                   1563:     }
                   1564: 
                   1565:     inputStream = xmlNewInputStream(ctxt);
                   1566:     if (inputStream == NULL)
                   1567:        return(NULL);
                   1568: 
                   1569:     inputStream->buf = buf;
                   1570:     inputStream = xmlCheckHTTPInput(ctxt, inputStream);
                   1571:     if (inputStream == NULL)
                   1572:         return(NULL);
                   1573:     
                   1574:     if (inputStream->filename == NULL)
                   1575:        URI = xmlStrdup((xmlChar *) filename);
                   1576:     else
                   1577:        URI = xmlStrdup((xmlChar *) inputStream->filename);
                   1578:     directory = xmlParserGetDirectory((const char *) URI);
                   1579:     if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
                   1580:     inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
                   1581:     if (URI != NULL) xmlFree((char *) URI);
                   1582:     inputStream->directory = directory;
                   1583: 
                   1584:     inputStream->base = inputStream->buf->buffer->content;
                   1585:     inputStream->cur = inputStream->buf->buffer->content;
                   1586:     inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
                   1587:     if ((ctxt->directory == NULL) && (directory != NULL))
                   1588:         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
                   1589:     return(inputStream);
                   1590: }
                   1591: 
                   1592: /************************************************************************
                   1593:  *                                                                     *
                   1594:  *             Commodity functions to handle parser contexts           *
                   1595:  *                                                                     *
                   1596:  ************************************************************************/
                   1597: 
                   1598: /**
                   1599:  * xmlInitParserCtxt:
                   1600:  * @ctxt:  an XML parser context
                   1601:  *
                   1602:  * Initialize a parser context
                   1603:  *
                   1604:  * Returns 0 in case of success and -1 in case of error
                   1605:  */
                   1606: 
                   1607: int
                   1608: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
                   1609: {
                   1610:     xmlParserInputPtr input;
                   1611: 
                   1612:     if(ctxt==NULL) {
                   1613:         xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
                   1614:         return(-1);
                   1615:     }
                   1616: 
                   1617:     xmlDefaultSAXHandlerInit();
                   1618: 
                   1619:     if (ctxt->dict == NULL)
                   1620:        ctxt->dict = xmlDictCreate();
                   1621:     if (ctxt->dict == NULL) {
                   1622:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1623:        return(-1);
                   1624:     }
                   1625:     if (ctxt->sax == NULL)
                   1626:        ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
                   1627:     if (ctxt->sax == NULL) {
                   1628:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1629:        return(-1);
                   1630:     }
                   1631:     else
                   1632:         xmlSAXVersion(ctxt->sax, 2);
                   1633: 
                   1634:     ctxt->maxatts = 0;
                   1635:     ctxt->atts = NULL;
                   1636:     /* Allocate the Input stack */
                   1637:     if (ctxt->inputTab == NULL) {
                   1638:        ctxt->inputTab = (xmlParserInputPtr *)
                   1639:                    xmlMalloc(5 * sizeof(xmlParserInputPtr));
                   1640:        ctxt->inputMax = 5;
                   1641:     }
                   1642:     if (ctxt->inputTab == NULL) {
                   1643:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1644:        ctxt->inputNr = 0;
                   1645:        ctxt->inputMax = 0;
                   1646:        ctxt->input = NULL;
                   1647:        return(-1);
                   1648:     }
                   1649:     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
                   1650:         xmlFreeInputStream(input);
                   1651:     }
                   1652:     ctxt->inputNr = 0;
                   1653:     ctxt->input = NULL;
                   1654: 
                   1655:     ctxt->version = NULL;
                   1656:     ctxt->encoding = NULL;
                   1657:     ctxt->standalone = -1;
                   1658:     ctxt->hasExternalSubset = 0;
                   1659:     ctxt->hasPErefs = 0;
                   1660:     ctxt->html = 0;
                   1661:     ctxt->external = 0;
                   1662:     ctxt->instate = XML_PARSER_START;
                   1663:     ctxt->token = 0;
                   1664:     ctxt->directory = NULL;
                   1665: 
                   1666:     /* Allocate the Node stack */
                   1667:     if (ctxt->nodeTab == NULL) {
                   1668:        ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
                   1669:        ctxt->nodeMax = 10;
                   1670:     }
                   1671:     if (ctxt->nodeTab == NULL) {
                   1672:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1673:        ctxt->nodeNr = 0;
                   1674:        ctxt->nodeMax = 0;
                   1675:        ctxt->node = NULL;
                   1676:        ctxt->inputNr = 0;
                   1677:        ctxt->inputMax = 0;
                   1678:        ctxt->input = NULL;
                   1679:        return(-1);
                   1680:     }
                   1681:     ctxt->nodeNr = 0;
                   1682:     ctxt->node = NULL;
                   1683: 
                   1684:     /* Allocate the Name stack */
                   1685:     if (ctxt->nameTab == NULL) {
                   1686:        ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
                   1687:        ctxt->nameMax = 10;
                   1688:     }
                   1689:     if (ctxt->nameTab == NULL) {
                   1690:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1691:        ctxt->nodeNr = 0;
                   1692:        ctxt->nodeMax = 0;
                   1693:        ctxt->node = NULL;
                   1694:        ctxt->inputNr = 0;
                   1695:        ctxt->inputMax = 0;
                   1696:        ctxt->input = NULL;
                   1697:        ctxt->nameNr = 0;
                   1698:        ctxt->nameMax = 0;
                   1699:        ctxt->name = NULL;
                   1700:        return(-1);
                   1701:     }
                   1702:     ctxt->nameNr = 0;
                   1703:     ctxt->name = NULL;
                   1704: 
                   1705:     /* Allocate the space stack */
                   1706:     if (ctxt->spaceTab == NULL) {
                   1707:        ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
                   1708:        ctxt->spaceMax = 10;
                   1709:     }
                   1710:     if (ctxt->spaceTab == NULL) {
                   1711:         xmlErrMemory(NULL, "cannot initialize parser context\n");
                   1712:        ctxt->nodeNr = 0;
                   1713:        ctxt->nodeMax = 0;
                   1714:        ctxt->node = NULL;
                   1715:        ctxt->inputNr = 0;
                   1716:        ctxt->inputMax = 0;
                   1717:        ctxt->input = NULL;
                   1718:        ctxt->nameNr = 0;
                   1719:        ctxt->nameMax = 0;
                   1720:        ctxt->name = NULL;
                   1721:        ctxt->spaceNr = 0;
                   1722:        ctxt->spaceMax = 0;
                   1723:        ctxt->space = NULL;
                   1724:        return(-1);
                   1725:     }
                   1726:     ctxt->spaceNr = 1;
                   1727:     ctxt->spaceMax = 10;
                   1728:     ctxt->spaceTab[0] = -1;
                   1729:     ctxt->space = &ctxt->spaceTab[0];
                   1730:     ctxt->userData = ctxt;
                   1731:     ctxt->myDoc = NULL;
                   1732:     ctxt->wellFormed = 1;
                   1733:     ctxt->nsWellFormed = 1;
                   1734:     ctxt->valid = 1;
                   1735:     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
                   1736:     ctxt->validate = xmlDoValidityCheckingDefaultValue;
                   1737:     ctxt->pedantic = xmlPedanticParserDefaultValue;
                   1738:     ctxt->linenumbers = xmlLineNumbersDefaultValue;
                   1739:     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
                   1740:     if (ctxt->keepBlanks == 0)
                   1741:        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
                   1742: 
                   1743:     ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
                   1744:     ctxt->vctxt.userData = ctxt;
                   1745:     ctxt->vctxt.error = xmlParserValidityError;
                   1746:     ctxt->vctxt.warning = xmlParserValidityWarning;
                   1747:     if (ctxt->validate) {
                   1748:        if (xmlGetWarningsDefaultValue == 0)
                   1749:            ctxt->vctxt.warning = NULL;
                   1750:        else
                   1751:            ctxt->vctxt.warning = xmlParserValidityWarning;
                   1752:        ctxt->vctxt.nodeMax = 0;
                   1753:     }
                   1754:     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
                   1755:     ctxt->record_info = 0;
                   1756:     ctxt->nbChars = 0;
                   1757:     ctxt->checkIndex = 0;
                   1758:     ctxt->inSubset = 0;
                   1759:     ctxt->errNo = XML_ERR_OK;
                   1760:     ctxt->depth = 0;
                   1761:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   1762:     ctxt->catalogs = NULL;
                   1763:     ctxt->nbentities = 0;
1.1.1.2 ! misho    1764:     ctxt->input_id = 1;
1.1       misho    1765:     xmlInitNodeInfoSeq(&ctxt->node_seq);
                   1766:     return(0);
                   1767: }
                   1768: 
                   1769: /**
                   1770:  * xmlFreeParserCtxt:
                   1771:  * @ctxt:  an XML parser context
                   1772:  *
                   1773:  * Free all the memory used by a parser context. However the parsed
                   1774:  * document in ctxt->myDoc is not freed.
                   1775:  */
                   1776: 
                   1777: void
                   1778: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
                   1779: {
                   1780:     xmlParserInputPtr input;
                   1781: 
                   1782:     if (ctxt == NULL) return;
                   1783: 
                   1784:     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
                   1785:         xmlFreeInputStream(input);
                   1786:     }
                   1787:     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
                   1788:     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
                   1789:     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
                   1790:     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
                   1791:     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
                   1792:     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
                   1793:     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
                   1794:     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
                   1795:     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
                   1796: #ifdef LIBXML_SAX1_ENABLED
                   1797:     if ((ctxt->sax != NULL) &&
                   1798:         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
                   1799: #else
                   1800:     if (ctxt->sax != NULL)
                   1801: #endif /* LIBXML_SAX1_ENABLED */
                   1802:         xmlFree(ctxt->sax);
                   1803:     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
                   1804:     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
                   1805:     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
                   1806:     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
                   1807:     if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
                   1808:     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
                   1809:     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
                   1810:     if (ctxt->attsDefault != NULL) 
                   1811:         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
                   1812:     if (ctxt->attsSpecial != NULL)
                   1813:         xmlHashFree(ctxt->attsSpecial, NULL);
                   1814:     if (ctxt->freeElems != NULL) {
                   1815:         xmlNodePtr cur, next;
                   1816: 
                   1817:        cur = ctxt->freeElems;
                   1818:        while (cur != NULL) {
                   1819:            next = cur->next;
                   1820:            xmlFree(cur);
                   1821:            cur = next;
                   1822:        }
                   1823:     }
                   1824:     if (ctxt->freeAttrs != NULL) {
                   1825:         xmlAttrPtr cur, next;
                   1826: 
                   1827:        cur = ctxt->freeAttrs;
                   1828:        while (cur != NULL) {
                   1829:            next = cur->next;
                   1830:            xmlFree(cur);
                   1831:            cur = next;
                   1832:        }
                   1833:     }
                   1834:     /*
                   1835:      * cleanup the error strings
                   1836:      */
                   1837:     if (ctxt->lastError.message != NULL)
                   1838:         xmlFree(ctxt->lastError.message);
                   1839:     if (ctxt->lastError.file != NULL)
                   1840:         xmlFree(ctxt->lastError.file);
                   1841:     if (ctxt->lastError.str1 != NULL)
                   1842:         xmlFree(ctxt->lastError.str1);
                   1843:     if (ctxt->lastError.str2 != NULL)
                   1844:         xmlFree(ctxt->lastError.str2);
                   1845:     if (ctxt->lastError.str3 != NULL)
                   1846:         xmlFree(ctxt->lastError.str3);
                   1847: 
                   1848: #ifdef LIBXML_CATALOG_ENABLED
                   1849:     if (ctxt->catalogs != NULL)
                   1850:        xmlCatalogFreeLocal(ctxt->catalogs);
                   1851: #endif
                   1852:     xmlFree(ctxt);
                   1853: }
                   1854: 
                   1855: /**
                   1856:  * xmlNewParserCtxt:
                   1857:  *
                   1858:  * Allocate and initialize a new parser context.
                   1859:  *
                   1860:  * Returns the xmlParserCtxtPtr or NULL
                   1861:  */
                   1862: 
                   1863: xmlParserCtxtPtr
                   1864: xmlNewParserCtxt(void)
                   1865: {
                   1866:     xmlParserCtxtPtr ctxt;
                   1867: 
                   1868:     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
                   1869:     if (ctxt == NULL) {
                   1870:        xmlErrMemory(NULL, "cannot allocate parser context\n");
                   1871:        return(NULL);
                   1872:     }
                   1873:     memset(ctxt, 0, sizeof(xmlParserCtxt));
                   1874:     if (xmlInitParserCtxt(ctxt) < 0) {
                   1875:         xmlFreeParserCtxt(ctxt);
                   1876:        return(NULL);
                   1877:     }
                   1878:     return(ctxt);
                   1879: }
                   1880: 
                   1881: /************************************************************************
                   1882:  *                                                                     *
                   1883:  *             Handling of node informations                           *
                   1884:  *                                                                     *
                   1885:  ************************************************************************/
                   1886: 
                   1887: /**
                   1888:  * xmlClearParserCtxt:
                   1889:  * @ctxt:  an XML parser context
                   1890:  *
                   1891:  * Clear (release owned resources) and reinitialize a parser context
                   1892:  */
                   1893: 
                   1894: void
                   1895: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
                   1896: {
                   1897:   if (ctxt==NULL)
                   1898:     return;
                   1899:   xmlClearNodeInfoSeq(&ctxt->node_seq);
                   1900:   xmlCtxtReset(ctxt);
                   1901: }
                   1902: 
                   1903: 
                   1904: /**
                   1905:  * xmlParserFindNodeInfo:
                   1906:  * @ctx:  an XML parser context
                   1907:  * @node:  an XML node within the tree
                   1908:  *
                   1909:  * Find the parser node info struct for a given node
                   1910:  * 
                   1911:  * Returns an xmlParserNodeInfo block pointer or NULL
                   1912:  */
                   1913: const xmlParserNodeInfo *
                   1914: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
                   1915: {
                   1916:     unsigned long pos;
                   1917: 
                   1918:     if ((ctx == NULL) || (node == NULL))
                   1919:         return (NULL);
                   1920:     /* Find position where node should be at */
                   1921:     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
                   1922:     if (pos < ctx->node_seq.length
                   1923:         && ctx->node_seq.buffer[pos].node == node)
                   1924:         return &ctx->node_seq.buffer[pos];
                   1925:     else
                   1926:         return NULL;
                   1927: }
                   1928: 
                   1929: 
                   1930: /**
                   1931:  * xmlInitNodeInfoSeq:
                   1932:  * @seq:  a node info sequence pointer
                   1933:  *
                   1934:  * -- Initialize (set to initial state) node info sequence
                   1935:  */
                   1936: void
                   1937: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
                   1938: {
                   1939:     if (seq == NULL)
                   1940:         return;
                   1941:     seq->length = 0;
                   1942:     seq->maximum = 0;
                   1943:     seq->buffer = NULL;
                   1944: }
                   1945: 
                   1946: /**
                   1947:  * xmlClearNodeInfoSeq:
                   1948:  * @seq:  a node info sequence pointer
                   1949:  *
                   1950:  * -- Clear (release memory and reinitialize) node
                   1951:  *   info sequence
                   1952:  */
                   1953: void
                   1954: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
                   1955: {
                   1956:     if (seq == NULL)
                   1957:         return;
                   1958:     if (seq->buffer != NULL)
                   1959:         xmlFree(seq->buffer);
                   1960:     xmlInitNodeInfoSeq(seq);
                   1961: }
                   1962: 
                   1963: /**
                   1964:  * xmlParserFindNodeInfoIndex:
                   1965:  * @seq:  a node info sequence pointer
                   1966:  * @node:  an XML node pointer
                   1967:  *
                   1968:  * 
                   1969:  * xmlParserFindNodeInfoIndex : Find the index that the info record for
                   1970:  *   the given node is or should be at in a sorted sequence
                   1971:  *
                   1972:  * Returns a long indicating the position of the record
                   1973:  */
                   1974: unsigned long
                   1975: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
                   1976:                            const xmlNodePtr node)
                   1977: {
                   1978:     unsigned long upper, lower, middle;
                   1979:     int found = 0;
                   1980: 
                   1981:     if ((seq == NULL) || (node == NULL))
                   1982:         return ((unsigned long) -1);
                   1983: 
                   1984:     /* Do a binary search for the key */
                   1985:     lower = 1;
                   1986:     upper = seq->length;
                   1987:     middle = 0;
                   1988:     while (lower <= upper && !found) {
                   1989:         middle = lower + (upper - lower) / 2;
                   1990:         if (node == seq->buffer[middle - 1].node)
                   1991:             found = 1;
                   1992:         else if (node < seq->buffer[middle - 1].node)
                   1993:             upper = middle - 1;
                   1994:         else
                   1995:             lower = middle + 1;
                   1996:     }
                   1997: 
                   1998:     /* Return position */
                   1999:     if (middle == 0 || seq->buffer[middle - 1].node < node)
                   2000:         return middle;
                   2001:     else
                   2002:         return middle - 1;
                   2003: }
                   2004: 
                   2005: 
                   2006: /**
                   2007:  * xmlParserAddNodeInfo:
                   2008:  * @ctxt:  an XML parser context
                   2009:  * @info:  a node info sequence pointer
                   2010:  *
                   2011:  * Insert node info record into the sorted sequence
                   2012:  */
                   2013: void
                   2014: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
                   2015:                      const xmlParserNodeInfoPtr info)
                   2016: {
                   2017:     unsigned long pos;
                   2018: 
                   2019:     if ((ctxt == NULL) || (info == NULL)) return;
                   2020: 
                   2021:     /* Find pos and check to see if node is already in the sequence */
                   2022:     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
                   2023:                                      info->node);
                   2024: 
                   2025:     if ((pos < ctxt->node_seq.length) && 
                   2026:         (ctxt->node_seq.buffer != NULL) &&
                   2027:         (ctxt->node_seq.buffer[pos].node == info->node)) {
                   2028:         ctxt->node_seq.buffer[pos] = *info;
                   2029:     }
                   2030: 
                   2031:     /* Otherwise, we need to add new node to buffer */
                   2032:     else {
                   2033:         if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
                   2034:             xmlParserNodeInfo *tmp_buffer;
                   2035:             unsigned int byte_size;
                   2036: 
                   2037:             if (ctxt->node_seq.maximum == 0)
                   2038:                 ctxt->node_seq.maximum = 2;
                   2039:             byte_size = (sizeof(*ctxt->node_seq.buffer) *
                   2040:                        (2 * ctxt->node_seq.maximum));
                   2041: 
                   2042:             if (ctxt->node_seq.buffer == NULL)
                   2043:                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
                   2044:             else
                   2045:                 tmp_buffer =
                   2046:                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
                   2047:                                                      byte_size);
                   2048: 
                   2049:             if (tmp_buffer == NULL) {
                   2050:                xmlErrMemory(ctxt, "failed to allocate buffer\n");
                   2051:                 return;
                   2052:             }
                   2053:             ctxt->node_seq.buffer = tmp_buffer;
                   2054:             ctxt->node_seq.maximum *= 2;
                   2055:         }
                   2056: 
                   2057:         /* If position is not at end, move elements out of the way */
                   2058:         if (pos != ctxt->node_seq.length) {
                   2059:             unsigned long i;
                   2060: 
                   2061:             for (i = ctxt->node_seq.length; i > pos; i--)
                   2062:                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
                   2063:         }
                   2064: 
                   2065:         /* Copy element and increase length */
                   2066:         ctxt->node_seq.buffer[pos] = *info;
                   2067:         ctxt->node_seq.length++;
                   2068:     }
                   2069: }
                   2070: 
                   2071: /************************************************************************
                   2072:  *                                                                     *
                   2073:  *             Defaults settings                                       *
                   2074:  *                                                                     *
                   2075:  ************************************************************************/
                   2076: /**
                   2077:  * xmlPedanticParserDefault:
                   2078:  * @val:  int 0 or 1 
                   2079:  *
                   2080:  * Set and return the previous value for enabling pedantic warnings.
                   2081:  *
                   2082:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2083:  */
                   2084: 
                   2085: int
                   2086: xmlPedanticParserDefault(int val) {
                   2087:     int old = xmlPedanticParserDefaultValue;
                   2088: 
                   2089:     xmlPedanticParserDefaultValue = val;
                   2090:     return(old);
                   2091: }
                   2092: 
                   2093: /**
                   2094:  * xmlLineNumbersDefault:
                   2095:  * @val:  int 0 or 1 
                   2096:  *
                   2097:  * Set and return the previous value for enabling line numbers in elements
                   2098:  * contents. This may break on old application and is turned off by default.
                   2099:  *
                   2100:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2101:  */
                   2102: 
                   2103: int
                   2104: xmlLineNumbersDefault(int val) {
                   2105:     int old = xmlLineNumbersDefaultValue;
                   2106: 
                   2107:     xmlLineNumbersDefaultValue = val;
                   2108:     return(old);
                   2109: }
                   2110: 
                   2111: /**
                   2112:  * xmlSubstituteEntitiesDefault:
                   2113:  * @val:  int 0 or 1 
                   2114:  *
                   2115:  * Set and return the previous value for default entity support.
                   2116:  * Initially the parser always keep entity references instead of substituting
                   2117:  * entity values in the output. This function has to be used to change the
                   2118:  * default parser behavior
                   2119:  * SAX::substituteEntities() has to be used for changing that on a file by
                   2120:  * file basis.
                   2121:  *
                   2122:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2123:  */
                   2124: 
                   2125: int
                   2126: xmlSubstituteEntitiesDefault(int val) {
                   2127:     int old = xmlSubstituteEntitiesDefaultValue;
                   2128: 
                   2129:     xmlSubstituteEntitiesDefaultValue = val;
                   2130:     return(old);
                   2131: }
                   2132: 
                   2133: /**
                   2134:  * xmlKeepBlanksDefault:
                   2135:  * @val:  int 0 or 1 
                   2136:  *
                   2137:  * Set and return the previous value for default blanks text nodes support.
                   2138:  * The 1.x version of the parser used an heuristic to try to detect
                   2139:  * ignorable white spaces. As a result the SAX callback was generating
                   2140:  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
                   2141:  * using the DOM output text nodes containing those blanks were not generated.
                   2142:  * The 2.x and later version will switch to the XML standard way and
                   2143:  * ignorableWhitespace() are only generated when running the parser in
                   2144:  * validating mode and when the current element doesn't allow CDATA or
                   2145:  * mixed content.
                   2146:  * This function is provided as a way to force the standard behavior 
                   2147:  * on 1.X libs and to switch back to the old mode for compatibility when
                   2148:  * running 1.X client code on 2.X . Upgrade of 1.X code should be done
                   2149:  * by using xmlIsBlankNode() commodity function to detect the "empty"
                   2150:  * nodes generated.
                   2151:  * This value also affect autogeneration of indentation when saving code
                   2152:  * if blanks sections are kept, indentation is not generated.
                   2153:  *
                   2154:  * Returns the last value for 0 for no substitution, 1 for substitution.
                   2155:  */
                   2156: 
                   2157: int
                   2158: xmlKeepBlanksDefault(int val) {
                   2159:     int old = xmlKeepBlanksDefaultValue;
                   2160: 
                   2161:     xmlKeepBlanksDefaultValue = val;
                   2162:     if (!val) xmlIndentTreeOutput = 1;
                   2163:     return(old);
                   2164: }
                   2165: 
                   2166: #define bottom_parserInternals
                   2167: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>