File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parserInternals.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:29 2014 UTC (9 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD
libxml2 2.9.1

    1: /*
    2:  * parserInternals.c : Internal routines (and obsolete ones) needed for the
    3:  *                     XML and HTML parsers.
    4:  *
    5:  * See Copyright for the status of this software.
    6:  *
    7:  * daniel@veillard.com
    8:  */
    9: 
   10: #define IN_LIBXML
   11: #include "libxml.h"
   12: 
   13: #if defined(WIN32) && !defined (__CYGWIN__)
   14: #define XML_DIR_SEP '\\'
   15: #else
   16: #define XML_DIR_SEP '/'
   17: #endif
   18: 
   19: #include <string.h>
   20: #ifdef HAVE_CTYPE_H
   21: #include <ctype.h>
   22: #endif
   23: #ifdef HAVE_STDLIB_H
   24: #include <stdlib.h>
   25: #endif
   26: #ifdef HAVE_SYS_STAT_H
   27: #include <sys/stat.h>
   28: #endif
   29: #ifdef HAVE_FCNTL_H
   30: #include <fcntl.h>
   31: #endif
   32: #ifdef HAVE_UNISTD_H
   33: #include <unistd.h>
   34: #endif
   35: #ifdef HAVE_ZLIB_H
   36: #include <zlib.h>
   37: #endif
   38: 
   39: #include <libxml/xmlmemory.h>
   40: #include <libxml/tree.h>
   41: #include <libxml/parser.h>
   42: #include <libxml/parserInternals.h>
   43: #include <libxml/valid.h>
   44: #include <libxml/entities.h>
   45: #include <libxml/xmlerror.h>
   46: #include <libxml/encoding.h>
   47: #include <libxml/valid.h>
   48: #include <libxml/xmlIO.h>
   49: #include <libxml/uri.h>
   50: #include <libxml/dict.h>
   51: #include <libxml/SAX.h>
   52: #ifdef LIBXML_CATALOG_ENABLED
   53: #include <libxml/catalog.h>
   54: #endif
   55: #include <libxml/globals.h>
   56: #include <libxml/chvalid.h>
   57: 
   58: #include "buf.h"
   59: #include "enc.h"
   60: 
   61: /*
   62:  * Various global defaults for parsing
   63:  */
   64: 
   65: /**
   66:  * xmlCheckVersion:
   67:  * @version: the include version number
   68:  *
   69:  * check the compiled lib version against the include one.
   70:  * This can warn or immediately kill the application
   71:  */
   72: void
   73: xmlCheckVersion(int version) {
   74:     int myversion = (int) LIBXML_VERSION;
   75: 
   76:     xmlInitParser();
   77: 
   78:     if ((myversion / 10000) != (version / 10000)) {
   79: 	xmlGenericError(xmlGenericErrorContext,
   80: 		"Fatal: program compiled against libxml %d using libxml %d\n",
   81: 		(version / 10000), (myversion / 10000));
   82: 	fprintf(stderr,
   83: 		"Fatal: program compiled against libxml %d using libxml %d\n",
   84: 		(version / 10000), (myversion / 10000));
   85:     }
   86:     if ((myversion / 100) < (version / 100)) {
   87: 	xmlGenericError(xmlGenericErrorContext,
   88: 		"Warning: program compiled against libxml %d using older %d\n",
   89: 		(version / 100), (myversion / 100));
   90:     }
   91: }
   92: 
   93: 
   94: /************************************************************************
   95:  *									*
   96:  *		Some factorized error routines				*
   97:  *									*
   98:  ************************************************************************/
   99: 
  100: 
  101: /**
  102:  * xmlErrMemory:
  103:  * @ctxt:  an XML parser context
  104:  * @extra:  extra informations
  105:  *
  106:  * Handle a redefinition of attribute error
  107:  */
  108: void
  109: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
  110: {
  111:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  112:         (ctxt->instate == XML_PARSER_EOF))
  113: 	return;
  114:     if (ctxt != NULL) {
  115:         ctxt->errNo = XML_ERR_NO_MEMORY;
  116:         ctxt->instate = XML_PARSER_EOF;
  117:         ctxt->disableSAX = 1;
  118:     }
  119:     if (extra)
  120:         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  121:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
  122:                         NULL, NULL, 0, 0,
  123:                         "Memory allocation failed : %s\n", extra);
  124:     else
  125:         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  126:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
  127:                         NULL, NULL, 0, 0, "Memory allocation failed\n");
  128: }
  129: 
  130: /**
  131:  * __xmlErrEncoding:
  132:  * @ctxt:  an XML parser context
  133:  * @xmlerr:  the error number
  134:  * @msg:  the error message
  135:  * @str1:  an string info
  136:  * @str2:  an string info
  137:  *
  138:  * Handle an encoding error
  139:  */
  140: void
  141: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
  142:                  const char *msg, const xmlChar * str1, const xmlChar * str2)
  143: {
  144:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  145:         (ctxt->instate == XML_PARSER_EOF))
  146: 	return;
  147:     if (ctxt != NULL)
  148:         ctxt->errNo = xmlerr;
  149:     __xmlRaiseError(NULL, NULL, NULL,
  150:                     ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
  151:                     NULL, 0, (const char *) str1, (const char *) str2,
  152:                     NULL, 0, 0, msg, str1, str2);
  153:     if (ctxt != NULL) {
  154:         ctxt->wellFormed = 0;
  155:         if (ctxt->recovery == 0)
  156:             ctxt->disableSAX = 1;
  157:     }
  158: }
  159: 
  160: /**
  161:  * xmlErrInternal:
  162:  * @ctxt:  an XML parser context
  163:  * @msg:  the error message
  164:  * @str:  error informations
  165:  *
  166:  * Handle an internal error
  167:  */
  168: static void
  169: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
  170: {
  171:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  172:         (ctxt->instate == XML_PARSER_EOF))
  173: 	return;
  174:     if (ctxt != NULL)
  175:         ctxt->errNo = XML_ERR_INTERNAL_ERROR;
  176:     __xmlRaiseError(NULL, NULL, NULL,
  177:                     ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
  178:                     XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
  179:                     0, 0, msg, str);
  180:     if (ctxt != NULL) {
  181:         ctxt->wellFormed = 0;
  182:         if (ctxt->recovery == 0)
  183:             ctxt->disableSAX = 1;
  184:     }
  185: }
  186: 
  187: /**
  188:  * xmlErrEncodingInt:
  189:  * @ctxt:  an XML parser context
  190:  * @error:  the error number
  191:  * @msg:  the error message
  192:  * @val:  an integer value
  193:  *
  194:  * n encoding error
  195:  */
  196: static void
  197: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  198:                   const char *msg, int val)
  199: {
  200:     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  201:         (ctxt->instate == XML_PARSER_EOF))
  202: 	return;
  203:     if (ctxt != NULL)
  204:         ctxt->errNo = error;
  205:     __xmlRaiseError(NULL, NULL, NULL,
  206:                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  207:                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
  208:     if (ctxt != NULL) {
  209:         ctxt->wellFormed = 0;
  210:         if (ctxt->recovery == 0)
  211:             ctxt->disableSAX = 1;
  212:     }
  213: }
  214: 
  215: /**
  216:  * xmlIsLetter:
  217:  * @c:  an unicode character (int)
  218:  *
  219:  * Check whether the character is allowed by the production
  220:  * [84] Letter ::= BaseChar | Ideographic
  221:  *
  222:  * Returns 0 if not, non-zero otherwise
  223:  */
  224: int
  225: xmlIsLetter(int c) {
  226:     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
  227: }
  228: 
  229: /************************************************************************
  230:  *									*
  231:  *		Input handling functions for progressive parsing	*
  232:  *									*
  233:  ************************************************************************/
  234: 
  235: /* #define DEBUG_INPUT */
  236: /* #define DEBUG_STACK */
  237: /* #define DEBUG_PUSH */
  238: 
  239: 
  240: /* we need to keep enough input to show errors in context */
  241: #define LINE_LEN        80
  242: 
  243: #ifdef DEBUG_INPUT
  244: #define CHECK_BUFFER(in) check_buffer(in)
  245: 
  246: static
  247: void check_buffer(xmlParserInputPtr in) {
  248:     if (in->base != xmlBufContent(in->buf->buffer)) {
  249:         xmlGenericError(xmlGenericErrorContext,
  250: 		"xmlParserInput: base mismatch problem\n");
  251:     }
  252:     if (in->cur < in->base) {
  253:         xmlGenericError(xmlGenericErrorContext,
  254: 		"xmlParserInput: cur < base problem\n");
  255:     }
  256:     if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
  257:         xmlGenericError(xmlGenericErrorContext,
  258: 		"xmlParserInput: cur > base + use problem\n");
  259:     }
  260:     xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
  261:             (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
  262: 	    xmlBufUse(in->buf->buffer));
  263: }
  264: 
  265: #else
  266: #define CHECK_BUFFER(in)
  267: #endif
  268: 
  269: 
  270: /**
  271:  * xmlParserInputRead:
  272:  * @in:  an XML parser input
  273:  * @len:  an indicative size for the lookahead
  274:  *
  275:  * This function was internal and is deprecated.
  276:  *
  277:  * Returns -1 as this is an error to use it.
  278:  */
  279: int
  280: xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
  281:     return(-1);
  282: }
  283: 
  284: /**
  285:  * xmlParserInputGrow:
  286:  * @in:  an XML parser input
  287:  * @len:  an indicative size for the lookahead
  288:  *
  289:  * This function increase the input for the parser. It tries to
  290:  * preserve pointers to the input buffer, and keep already read data
  291:  *
  292:  * Returns the amount of char read, or -1 in case of error, 0 indicate the
  293:  * end of this entity
  294:  */
  295: int
  296: xmlParserInputGrow(xmlParserInputPtr in, int len) {
  297:     size_t ret;
  298:     size_t indx;
  299:     const xmlChar *content;
  300: 
  301:     if ((in == NULL) || (len < 0)) return(-1);
  302: #ifdef DEBUG_INPUT
  303:     xmlGenericError(xmlGenericErrorContext, "Grow\n");
  304: #endif
  305:     if (in->buf == NULL) return(-1);
  306:     if (in->base == NULL) return(-1);
  307:     if (in->cur == NULL) return(-1);
  308:     if (in->buf->buffer == NULL) return(-1);
  309: 
  310:     CHECK_BUFFER(in);
  311: 
  312:     indx = in->cur - in->base;
  313:     if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
  314: 
  315: 	CHECK_BUFFER(in);
  316: 
  317:         return(0);
  318:     }
  319:     if (in->buf->readcallback != NULL) {
  320: 	ret = xmlParserInputBufferGrow(in->buf, len);
  321:     } else
  322:         return(0);
  323: 
  324:     /*
  325:      * NOTE : in->base may be a "dangling" i.e. freed pointer in this
  326:      *        block, but we use it really as an integer to do some
  327:      *        pointer arithmetic. Insure will raise it as a bug but in
  328:      *        that specific case, that's not !
  329:      */
  330: 
  331:     content = xmlBufContent(in->buf->buffer);
  332:     if (in->base != content) {
  333:         /*
  334: 	 * the buffer has been reallocated
  335: 	 */
  336: 	indx = in->cur - in->base;
  337: 	in->base = content;
  338: 	in->cur = &content[indx];
  339:     }
  340:     in->end = xmlBufEnd(in->buf->buffer);
  341: 
  342:     CHECK_BUFFER(in);
  343: 
  344:     return(ret);
  345: }
  346: 
  347: /**
  348:  * xmlParserInputShrink:
  349:  * @in:  an XML parser input
  350:  *
  351:  * This function removes used input for the parser.
  352:  */
  353: void
  354: xmlParserInputShrink(xmlParserInputPtr in) {
  355:     size_t used;
  356:     size_t ret;
  357:     size_t indx;
  358:     const xmlChar *content;
  359: 
  360: #ifdef DEBUG_INPUT
  361:     xmlGenericError(xmlGenericErrorContext, "Shrink\n");
  362: #endif
  363:     if (in == NULL) return;
  364:     if (in->buf == NULL) return;
  365:     if (in->base == NULL) return;
  366:     if (in->cur == NULL) return;
  367:     if (in->buf->buffer == NULL) return;
  368: 
  369:     CHECK_BUFFER(in);
  370: 
  371:     used = in->cur - xmlBufContent(in->buf->buffer);
  372:     /*
  373:      * Do not shrink on large buffers whose only a tiny fraction
  374:      * was consumed
  375:      */
  376:     if (used > INPUT_CHUNK) {
  377: 	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
  378: 	if (ret > 0) {
  379: 	    in->cur -= ret;
  380: 	    in->consumed += ret;
  381: 	}
  382: 	in->end = xmlBufEnd(in->buf->buffer);
  383:     }
  384: 
  385:     CHECK_BUFFER(in);
  386: 
  387:     if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
  388:         return;
  389:     }
  390:     xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
  391:     content = xmlBufContent(in->buf->buffer);
  392:     if (in->base != content) {
  393:         /*
  394: 	 * the buffer has been reallocated
  395: 	 */
  396: 	indx = in->cur - in->base;
  397: 	in->base = content;
  398: 	in->cur = &content[indx];
  399:     }
  400:     in->end = xmlBufEnd(in->buf->buffer);
  401: 
  402:     CHECK_BUFFER(in);
  403: }
  404: 
  405: /************************************************************************
  406:  *									*
  407:  *		UTF8 character input and related functions		*
  408:  *									*
  409:  ************************************************************************/
  410: 
  411: /**
  412:  * xmlNextChar:
  413:  * @ctxt:  the XML parser context
  414:  *
  415:  * Skip to the next char input char.
  416:  */
  417: 
  418: void
  419: xmlNextChar(xmlParserCtxtPtr ctxt)
  420: {
  421:     if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
  422:         (ctxt->input == NULL))
  423:         return;
  424: 
  425:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
  426:         if ((*ctxt->input->cur == 0) &&
  427:             (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
  428:             (ctxt->instate != XML_PARSER_COMMENT)) {
  429:             /*
  430:              * If we are at the end of the current entity and
  431:              * the context allows it, we pop consumed entities
  432:              * automatically.
  433:              * the auto closing should be blocked in other cases
  434:              */
  435:             xmlPopInput(ctxt);
  436:         } else {
  437:             const unsigned char *cur;
  438:             unsigned char c;
  439: 
  440:             /*
  441:              *   2.11 End-of-Line Handling
  442:              *   the literal two-character sequence "#xD#xA" or a standalone
  443:              *   literal #xD, an XML processor must pass to the application
  444:              *   the single character #xA.
  445:              */
  446:             if (*(ctxt->input->cur) == '\n') {
  447:                 ctxt->input->line++; ctxt->input->col = 1;
  448:             } else
  449:                 ctxt->input->col++;
  450: 
  451:             /*
  452:              * We are supposed to handle UTF8, check it's valid
  453:              * From rfc2044: encoding of the Unicode values on UTF-8:
  454:              *
  455:              * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
  456:              * 0000 0000-0000 007F   0xxxxxxx
  457:              * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
  458:              * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
  459:              *
  460:              * Check for the 0x110000 limit too
  461:              */
  462:             cur = ctxt->input->cur;
  463: 
  464:             c = *cur;
  465:             if (c & 0x80) {
  466: 	        if (c == 0xC0)
  467: 		    goto encoding_error;
  468:                 if (cur[1] == 0) {
  469:                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  470:                     cur = ctxt->input->cur;
  471:                 }
  472:                 if ((cur[1] & 0xc0) != 0x80)
  473:                     goto encoding_error;
  474:                 if ((c & 0xe0) == 0xe0) {
  475:                     unsigned int val;
  476: 
  477:                     if (cur[2] == 0) {
  478:                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  479:                         cur = ctxt->input->cur;
  480:                     }
  481:                     if ((cur[2] & 0xc0) != 0x80)
  482:                         goto encoding_error;
  483:                     if ((c & 0xf0) == 0xf0) {
  484:                         if (cur[3] == 0) {
  485:                             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  486:                             cur = ctxt->input->cur;
  487:                         }
  488:                         if (((c & 0xf8) != 0xf0) ||
  489:                             ((cur[3] & 0xc0) != 0x80))
  490:                             goto encoding_error;
  491:                         /* 4-byte code */
  492:                         ctxt->input->cur += 4;
  493:                         val = (cur[0] & 0x7) << 18;
  494:                         val |= (cur[1] & 0x3f) << 12;
  495:                         val |= (cur[2] & 0x3f) << 6;
  496:                         val |= cur[3] & 0x3f;
  497:                     } else {
  498:                         /* 3-byte code */
  499:                         ctxt->input->cur += 3;
  500:                         val = (cur[0] & 0xf) << 12;
  501:                         val |= (cur[1] & 0x3f) << 6;
  502:                         val |= cur[2] & 0x3f;
  503:                     }
  504:                     if (((val > 0xd7ff) && (val < 0xe000)) ||
  505:                         ((val > 0xfffd) && (val < 0x10000)) ||
  506:                         (val >= 0x110000)) {
  507: 			xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
  508: 					  "Char 0x%X out of allowed range\n",
  509: 					  val);
  510:                     }
  511:                 } else
  512:                     /* 2-byte code */
  513:                     ctxt->input->cur += 2;
  514:             } else
  515:                 /* 1-byte code */
  516:                 ctxt->input->cur++;
  517: 
  518:             ctxt->nbChars++;
  519:             if (*ctxt->input->cur == 0)
  520:                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  521:         }
  522:     } else {
  523:         /*
  524:          * Assume it's a fixed length encoding (1) with
  525:          * a compatible encoding for the ASCII set, since
  526:          * XML constructs only use < 128 chars
  527:          */
  528: 
  529:         if (*(ctxt->input->cur) == '\n') {
  530:             ctxt->input->line++; ctxt->input->col = 1;
  531:         } else
  532:             ctxt->input->col++;
  533:         ctxt->input->cur++;
  534:         ctxt->nbChars++;
  535:         if (*ctxt->input->cur == 0)
  536:             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  537:     }
  538:     if ((*ctxt->input->cur == '%') && (!ctxt->html))
  539:         xmlParserHandlePEReference(ctxt);
  540:     if ((*ctxt->input->cur == 0) &&
  541:         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
  542:         xmlPopInput(ctxt);
  543:     return;
  544: encoding_error:
  545:     /*
  546:      * If we detect an UTF8 error that probably mean that the
  547:      * input encoding didn't get properly advertised in the
  548:      * declaration header. Report the error and switch the encoding
  549:      * to ISO-Latin-1 (if you don't like this policy, just declare the
  550:      * encoding !)
  551:      */
  552:     if ((ctxt == NULL) || (ctxt->input == NULL) ||
  553:         (ctxt->input->end - ctxt->input->cur < 4)) {
  554: 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  555: 		     "Input is not proper UTF-8, indicate encoding !\n",
  556: 		     NULL, NULL);
  557:     } else {
  558:         char buffer[150];
  559: 
  560: 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  561: 			ctxt->input->cur[0], ctxt->input->cur[1],
  562: 			ctxt->input->cur[2], ctxt->input->cur[3]);
  563: 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  564: 		     "Input is not proper UTF-8, indicate encoding !\n%s",
  565: 		     BAD_CAST buffer, NULL);
  566:     }
  567:     ctxt->charset = XML_CHAR_ENCODING_8859_1;
  568:     ctxt->input->cur++;
  569:     return;
  570: }
  571: 
  572: /**
  573:  * xmlCurrentChar:
  574:  * @ctxt:  the XML parser context
  575:  * @len:  pointer to the length of the char read
  576:  *
  577:  * The current char value, if using UTF-8 this may actually span multiple
  578:  * bytes in the input buffer. Implement the end of line normalization:
  579:  * 2.11 End-of-Line Handling
  580:  * Wherever an external parsed entity or the literal entity value
  581:  * of an internal parsed entity contains either the literal two-character
  582:  * sequence "#xD#xA" or a standalone literal #xD, an XML processor
  583:  * must pass to the application the single character #xA.
  584:  * This behavior can conveniently be produced by normalizing all
  585:  * line breaks to #xA on input, before parsing.)
  586:  *
  587:  * Returns the current char value and its length
  588:  */
  589: 
  590: int
  591: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
  592:     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
  593:     if (ctxt->instate == XML_PARSER_EOF)
  594: 	return(0);
  595: 
  596:     if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
  597: 	    *len = 1;
  598: 	    return((int) *ctxt->input->cur);
  599:     }
  600:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
  601: 	/*
  602: 	 * We are supposed to handle UTF8, check it's valid
  603: 	 * From rfc2044: encoding of the Unicode values on UTF-8:
  604: 	 *
  605: 	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
  606: 	 * 0000 0000-0000 007F   0xxxxxxx
  607: 	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
  608: 	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
  609: 	 *
  610: 	 * Check for the 0x110000 limit too
  611: 	 */
  612: 	const unsigned char *cur = ctxt->input->cur;
  613: 	unsigned char c;
  614: 	unsigned int val;
  615: 
  616: 	c = *cur;
  617: 	if (c & 0x80) {
  618: 	    if (((c & 0x40) == 0) || (c == 0xC0))
  619: 		goto encoding_error;
  620: 	    if (cur[1] == 0) {
  621: 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  622:                 cur = ctxt->input->cur;
  623:             }
  624: 	    if ((cur[1] & 0xc0) != 0x80)
  625: 		goto encoding_error;
  626: 	    if ((c & 0xe0) == 0xe0) {
  627: 		if (cur[2] == 0) {
  628: 		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  629:                     cur = ctxt->input->cur;
  630:                 }
  631: 		if ((cur[2] & 0xc0) != 0x80)
  632: 		    goto encoding_error;
  633: 		if ((c & 0xf0) == 0xf0) {
  634: 		    if (cur[3] == 0) {
  635: 			xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  636:                         cur = ctxt->input->cur;
  637:                     }
  638: 		    if (((c & 0xf8) != 0xf0) ||
  639: 			((cur[3] & 0xc0) != 0x80))
  640: 			goto encoding_error;
  641: 		    /* 4-byte code */
  642: 		    *len = 4;
  643: 		    val = (cur[0] & 0x7) << 18;
  644: 		    val |= (cur[1] & 0x3f) << 12;
  645: 		    val |= (cur[2] & 0x3f) << 6;
  646: 		    val |= cur[3] & 0x3f;
  647: 		    if (val < 0x10000)
  648: 			goto encoding_error;
  649: 		} else {
  650: 		  /* 3-byte code */
  651: 		    *len = 3;
  652: 		    val = (cur[0] & 0xf) << 12;
  653: 		    val |= (cur[1] & 0x3f) << 6;
  654: 		    val |= cur[2] & 0x3f;
  655: 		    if (val < 0x800)
  656: 			goto encoding_error;
  657: 		}
  658: 	    } else {
  659: 	      /* 2-byte code */
  660: 		*len = 2;
  661: 		val = (cur[0] & 0x1f) << 6;
  662: 		val |= cur[1] & 0x3f;
  663: 		if (val < 0x80)
  664: 		    goto encoding_error;
  665: 	    }
  666: 	    if (!IS_CHAR(val)) {
  667: 	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
  668: 				  "Char 0x%X out of allowed range\n", val);
  669: 	    }
  670: 	    return(val);
  671: 	} else {
  672: 	    /* 1-byte code */
  673: 	    *len = 1;
  674: 	    if (*ctxt->input->cur == 0)
  675: 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  676: 	    if ((*ctxt->input->cur == 0) &&
  677: 	        (ctxt->input->end > ctxt->input->cur)) {
  678: 	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
  679: 				  "Char 0x0 out of allowed range\n", 0);
  680: 	    }
  681: 	    if (*ctxt->input->cur == 0xD) {
  682: 		if (ctxt->input->cur[1] == 0xA) {
  683: 		    ctxt->nbChars++;
  684: 		    ctxt->input->cur++;
  685: 		}
  686: 		return(0xA);
  687: 	    }
  688: 	    return((int) *ctxt->input->cur);
  689: 	}
  690:     }
  691:     /*
  692:      * Assume it's a fixed length encoding (1) with
  693:      * a compatible encoding for the ASCII set, since
  694:      * XML constructs only use < 128 chars
  695:      */
  696:     *len = 1;
  697:     if (*ctxt->input->cur == 0xD) {
  698: 	if (ctxt->input->cur[1] == 0xA) {
  699: 	    ctxt->nbChars++;
  700: 	    ctxt->input->cur++;
  701: 	}
  702: 	return(0xA);
  703:     }
  704:     return((int) *ctxt->input->cur);
  705: encoding_error:
  706:     /*
  707:      * An encoding problem may arise from a truncated input buffer
  708:      * splitting a character in the middle. In that case do not raise
  709:      * an error but return 0 to endicate an end of stream problem
  710:      */
  711:     if (ctxt->input->end - ctxt->input->cur < 4) {
  712: 	*len = 0;
  713: 	return(0);
  714:     }
  715: 
  716:     /*
  717:      * If we detect an UTF8 error that probably mean that the
  718:      * input encoding didn't get properly advertised in the
  719:      * declaration header. Report the error and switch the encoding
  720:      * to ISO-Latin-1 (if you don't like this policy, just declare the
  721:      * encoding !)
  722:      */
  723:     {
  724:         char buffer[150];
  725: 
  726: 	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  727: 			ctxt->input->cur[0], ctxt->input->cur[1],
  728: 			ctxt->input->cur[2], ctxt->input->cur[3]);
  729: 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  730: 		     "Input is not proper UTF-8, indicate encoding !\n%s",
  731: 		     BAD_CAST buffer, NULL);
  732:     }
  733:     ctxt->charset = XML_CHAR_ENCODING_8859_1;
  734:     *len = 1;
  735:     return((int) *ctxt->input->cur);
  736: }
  737: 
  738: /**
  739:  * xmlStringCurrentChar:
  740:  * @ctxt:  the XML parser context
  741:  * @cur:  pointer to the beginning of the char
  742:  * @len:  pointer to the length of the char read
  743:  *
  744:  * The current char value, if using UTF-8 this may actually span multiple
  745:  * bytes in the input buffer.
  746:  *
  747:  * Returns the current char value and its length
  748:  */
  749: 
  750: int
  751: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
  752: {
  753:     if ((len == NULL) || (cur == NULL)) return(0);
  754:     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
  755:         /*
  756:          * We are supposed to handle UTF8, check it's valid
  757:          * From rfc2044: encoding of the Unicode values on UTF-8:
  758:          *
  759:          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
  760:          * 0000 0000-0000 007F   0xxxxxxx
  761:          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
  762:          * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
  763:          *
  764:          * Check for the 0x110000 limit too
  765:          */
  766:         unsigned char c;
  767:         unsigned int val;
  768: 
  769:         c = *cur;
  770:         if (c & 0x80) {
  771:             if ((cur[1] & 0xc0) != 0x80)
  772:                 goto encoding_error;
  773:             if ((c & 0xe0) == 0xe0) {
  774: 
  775:                 if ((cur[2] & 0xc0) != 0x80)
  776:                     goto encoding_error;
  777:                 if ((c & 0xf0) == 0xf0) {
  778:                     if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
  779:                         goto encoding_error;
  780:                     /* 4-byte code */
  781:                     *len = 4;
  782:                     val = (cur[0] & 0x7) << 18;
  783:                     val |= (cur[1] & 0x3f) << 12;
  784:                     val |= (cur[2] & 0x3f) << 6;
  785:                     val |= cur[3] & 0x3f;
  786:                 } else {
  787:                     /* 3-byte code */
  788:                     *len = 3;
  789:                     val = (cur[0] & 0xf) << 12;
  790:                     val |= (cur[1] & 0x3f) << 6;
  791:                     val |= cur[2] & 0x3f;
  792:                 }
  793:             } else {
  794:                 /* 2-byte code */
  795:                 *len = 2;
  796:                 val = (cur[0] & 0x1f) << 6;
  797:                 val |= cur[1] & 0x3f;
  798:             }
  799:             if (!IS_CHAR(val)) {
  800: 	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
  801: 				  "Char 0x%X out of allowed range\n", val);
  802:             }
  803:             return (val);
  804:         } else {
  805:             /* 1-byte code */
  806:             *len = 1;
  807:             return ((int) *cur);
  808:         }
  809:     }
  810:     /*
  811:      * Assume it's a fixed length encoding (1) with
  812:      * a compatible encoding for the ASCII set, since
  813:      * XML constructs only use < 128 chars
  814:      */
  815:     *len = 1;
  816:     return ((int) *cur);
  817: encoding_error:
  818: 
  819:     /*
  820:      * An encoding problem may arise from a truncated input buffer
  821:      * splitting a character in the middle. In that case do not raise
  822:      * an error but return 0 to endicate an end of stream problem
  823:      */
  824:     if ((ctxt == NULL) || (ctxt->input == NULL) ||
  825:         (ctxt->input->end - ctxt->input->cur < 4)) {
  826: 	*len = 0;
  827: 	return(0);
  828:     }
  829:     /*
  830:      * If we detect an UTF8 error that probably mean that the
  831:      * input encoding didn't get properly advertised in the
  832:      * declaration header. Report the error and switch the encoding
  833:      * to ISO-Latin-1 (if you don't like this policy, just declare the
  834:      * encoding !)
  835:      */
  836:     {
  837:         char buffer[150];
  838: 
  839: 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  840: 			ctxt->input->cur[0], ctxt->input->cur[1],
  841: 			ctxt->input->cur[2], ctxt->input->cur[3]);
  842: 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  843: 		     "Input is not proper UTF-8, indicate encoding !\n%s",
  844: 		     BAD_CAST buffer, NULL);
  845:     }
  846:     *len = 1;
  847:     return ((int) *cur);
  848: }
  849: 
  850: /**
  851:  * xmlCopyCharMultiByte:
  852:  * @out:  pointer to an array of xmlChar
  853:  * @val:  the char value
  854:  *
  855:  * append the char value in the array
  856:  *
  857:  * Returns the number of xmlChar written
  858:  */
  859: int
  860: xmlCopyCharMultiByte(xmlChar *out, int val) {
  861:     if (out == NULL) return(0);
  862:     /*
  863:      * We are supposed to handle UTF8, check it's valid
  864:      * From rfc2044: encoding of the Unicode values on UTF-8:
  865:      *
  866:      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
  867:      * 0000 0000-0000 007F   0xxxxxxx
  868:      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
  869:      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
  870:      */
  871:     if  (val >= 0x80) {
  872: 	xmlChar *savedout = out;
  873: 	int bits;
  874: 	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
  875: 	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
  876: 	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
  877: 	else {
  878: 	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
  879: 		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
  880: 			      val);
  881: 	    return(0);
  882: 	}
  883: 	for ( ; bits >= 0; bits-= 6)
  884: 	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
  885: 	return (out - savedout);
  886:     }
  887:     *out = (xmlChar) val;
  888:     return 1;
  889: }
  890: 
  891: /**
  892:  * xmlCopyChar:
  893:  * @len:  Ignored, compatibility
  894:  * @out:  pointer to an array of xmlChar
  895:  * @val:  the char value
  896:  *
  897:  * append the char value in the array
  898:  *
  899:  * Returns the number of xmlChar written
  900:  */
  901: 
  902: int
  903: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
  904:     if (out == NULL) return(0);
  905:     /* the len parameter is ignored */
  906:     if  (val >= 0x80) {
  907: 	return(xmlCopyCharMultiByte (out, val));
  908:     }
  909:     *out = (xmlChar) val;
  910:     return 1;
  911: }
  912: 
  913: /************************************************************************
  914:  *									*
  915:  *		Commodity functions to switch encodings			*
  916:  *									*
  917:  ************************************************************************/
  918: 
  919: static int
  920: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
  921:                        xmlCharEncodingHandlerPtr handler, int len);
  922: static int
  923: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
  924:                           xmlCharEncodingHandlerPtr handler, int len);
  925: /**
  926:  * xmlSwitchEncoding:
  927:  * @ctxt:  the parser context
  928:  * @enc:  the encoding value (number)
  929:  *
  930:  * change the input functions when discovering the character encoding
  931:  * of a given entity.
  932:  *
  933:  * Returns 0 in case of success, -1 otherwise
  934:  */
  935: int
  936: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
  937: {
  938:     xmlCharEncodingHandlerPtr handler;
  939:     int len = -1;
  940: 
  941:     if (ctxt == NULL) return(-1);
  942:     switch (enc) {
  943: 	case XML_CHAR_ENCODING_ERROR:
  944: 	    __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
  945: 	                   "encoding unknown\n", NULL, NULL);
  946: 	    return(-1);
  947: 	case XML_CHAR_ENCODING_NONE:
  948: 	    /* let's assume it's UTF-8 without the XML decl */
  949: 	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
  950: 	    return(0);
  951: 	case XML_CHAR_ENCODING_UTF8:
  952: 	    /* default encoding, no conversion should be needed */
  953: 	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
  954: 
  955: 	    /*
  956: 	     * Errata on XML-1.0 June 20 2001
  957: 	     * Specific handling of the Byte Order Mark for
  958: 	     * UTF-8
  959: 	     */
  960: 	    if ((ctxt->input != NULL) &&
  961: 		(ctxt->input->cur[0] == 0xEF) &&
  962: 		(ctxt->input->cur[1] == 0xBB) &&
  963: 		(ctxt->input->cur[2] == 0xBF)) {
  964: 		ctxt->input->cur += 3;
  965: 	    }
  966: 	    return(0);
  967:     case XML_CHAR_ENCODING_UTF16LE:
  968:     case XML_CHAR_ENCODING_UTF16BE:
  969:         /*The raw input characters are encoded
  970:          *in UTF-16. As we expect this function
  971:          *to be called after xmlCharEncInFunc, we expect
  972:          *ctxt->input->cur to contain UTF-8 encoded characters.
  973:          *So the raw UTF16 Byte Order Mark
  974:          *has also been converted into
  975:          *an UTF-8 BOM. Let's skip that BOM.
  976:          */
  977:         if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
  978:             (ctxt->input->cur[0] == 0xEF) &&
  979:             (ctxt->input->cur[1] == 0xBB) &&
  980:             (ctxt->input->cur[2] == 0xBF)) {
  981:             ctxt->input->cur += 3;
  982:         }
  983:         len = 90;
  984: 	break;
  985:     case XML_CHAR_ENCODING_UCS2:
  986:         len = 90;
  987: 	break;
  988:     case XML_CHAR_ENCODING_UCS4BE:
  989:     case XML_CHAR_ENCODING_UCS4LE:
  990:     case XML_CHAR_ENCODING_UCS4_2143:
  991:     case XML_CHAR_ENCODING_UCS4_3412:
  992:         len = 180;
  993: 	break;
  994:     case XML_CHAR_ENCODING_EBCDIC:
  995:     case XML_CHAR_ENCODING_8859_1:
  996:     case XML_CHAR_ENCODING_8859_2:
  997:     case XML_CHAR_ENCODING_8859_3:
  998:     case XML_CHAR_ENCODING_8859_4:
  999:     case XML_CHAR_ENCODING_8859_5:
 1000:     case XML_CHAR_ENCODING_8859_6:
 1001:     case XML_CHAR_ENCODING_8859_7:
 1002:     case XML_CHAR_ENCODING_8859_8:
 1003:     case XML_CHAR_ENCODING_8859_9:
 1004:     case XML_CHAR_ENCODING_ASCII:
 1005:     case XML_CHAR_ENCODING_2022_JP:
 1006:     case XML_CHAR_ENCODING_SHIFT_JIS:
 1007:     case XML_CHAR_ENCODING_EUC_JP:
 1008:         len = 45;
 1009: 	break;
 1010:     }
 1011:     handler = xmlGetCharEncodingHandler(enc);
 1012:     if (handler == NULL) {
 1013: 	/*
 1014: 	 * Default handlers.
 1015: 	 */
 1016: 	switch (enc) {
 1017: 	    case XML_CHAR_ENCODING_ASCII:
 1018: 		/* default encoding, no conversion should be needed */
 1019: 		ctxt->charset = XML_CHAR_ENCODING_UTF8;
 1020: 		return(0);
 1021: 	    case XML_CHAR_ENCODING_UTF16LE:
 1022: 		break;
 1023: 	    case XML_CHAR_ENCODING_UTF16BE:
 1024: 		break;
 1025: 	    case XML_CHAR_ENCODING_UCS4LE:
 1026: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1027: 			       "encoding not supported %s\n",
 1028: 			       BAD_CAST "USC4 little endian", NULL);
 1029: 		break;
 1030: 	    case XML_CHAR_ENCODING_UCS4BE:
 1031: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1032: 			       "encoding not supported %s\n",
 1033: 			       BAD_CAST "USC4 big endian", NULL);
 1034: 		break;
 1035: 	    case XML_CHAR_ENCODING_EBCDIC:
 1036: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1037: 			       "encoding not supported %s\n",
 1038: 			       BAD_CAST "EBCDIC", NULL);
 1039: 		break;
 1040: 	    case XML_CHAR_ENCODING_UCS4_2143:
 1041: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1042: 			       "encoding not supported %s\n",
 1043: 			       BAD_CAST "UCS4 2143", NULL);
 1044: 		break;
 1045: 	    case XML_CHAR_ENCODING_UCS4_3412:
 1046: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1047: 			       "encoding not supported %s\n",
 1048: 			       BAD_CAST "UCS4 3412", NULL);
 1049: 		break;
 1050: 	    case XML_CHAR_ENCODING_UCS2:
 1051: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1052: 			       "encoding not supported %s\n",
 1053: 			       BAD_CAST "UCS2", NULL);
 1054: 		break;
 1055: 	    case XML_CHAR_ENCODING_8859_1:
 1056: 	    case XML_CHAR_ENCODING_8859_2:
 1057: 	    case XML_CHAR_ENCODING_8859_3:
 1058: 	    case XML_CHAR_ENCODING_8859_4:
 1059: 	    case XML_CHAR_ENCODING_8859_5:
 1060: 	    case XML_CHAR_ENCODING_8859_6:
 1061: 	    case XML_CHAR_ENCODING_8859_7:
 1062: 	    case XML_CHAR_ENCODING_8859_8:
 1063: 	    case XML_CHAR_ENCODING_8859_9:
 1064: 		/*
 1065: 		 * We used to keep the internal content in the
 1066: 		 * document encoding however this turns being unmaintainable
 1067: 		 * So xmlGetCharEncodingHandler() will return non-null
 1068: 		 * values for this now.
 1069: 		 */
 1070: 		if ((ctxt->inputNr == 1) &&
 1071: 		    (ctxt->encoding == NULL) &&
 1072: 		    (ctxt->input != NULL) &&
 1073: 		    (ctxt->input->encoding != NULL)) {
 1074: 		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
 1075: 		}
 1076: 		ctxt->charset = enc;
 1077: 		return(0);
 1078: 	    case XML_CHAR_ENCODING_2022_JP:
 1079: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1080: 			       "encoding not supported %s\n",
 1081: 			       BAD_CAST "ISO-2022-JP", NULL);
 1082: 		break;
 1083: 	    case XML_CHAR_ENCODING_SHIFT_JIS:
 1084: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1085: 			       "encoding not supported %s\n",
 1086: 			       BAD_CAST "Shift_JIS", NULL);
 1087: 		break;
 1088: 	    case XML_CHAR_ENCODING_EUC_JP:
 1089: 		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
 1090: 			       "encoding not supported %s\n",
 1091: 			       BAD_CAST "EUC-JP", NULL);
 1092: 		break;
 1093: 	    default:
 1094: 	        break;
 1095: 	}
 1096:     }
 1097:     if (handler == NULL)
 1098: 	return(-1);
 1099:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
 1100:     return(xmlSwitchToEncodingInt(ctxt, handler, len));
 1101: }
 1102: 
 1103: /**
 1104:  * xmlSwitchInputEncoding:
 1105:  * @ctxt:  the parser context
 1106:  * @input:  the input stream
 1107:  * @handler:  the encoding handler
 1108:  * @len:  the number of bytes to convert for the first line or -1
 1109:  *
 1110:  * change the input functions when discovering the character encoding
 1111:  * of a given entity.
 1112:  *
 1113:  * Returns 0 in case of success, -1 otherwise
 1114:  */
 1115: static int
 1116: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
 1117:                           xmlCharEncodingHandlerPtr handler, int len)
 1118: {
 1119:     int nbchars;
 1120: 
 1121:     if (handler == NULL)
 1122:         return (-1);
 1123:     if (input == NULL)
 1124:         return (-1);
 1125:     if (input->buf != NULL) {
 1126:         if (input->buf->encoder != NULL) {
 1127:             /*
 1128:              * Check in case the auto encoding detetection triggered
 1129:              * in already.
 1130:              */
 1131:             if (input->buf->encoder == handler)
 1132:                 return (0);
 1133: 
 1134:             /*
 1135:              * "UTF-16" can be used for both LE and BE
 1136:              if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
 1137:              BAD_CAST "UTF-16", 6)) &&
 1138:              (!xmlStrncmp(BAD_CAST handler->name,
 1139:              BAD_CAST "UTF-16", 6))) {
 1140:              return(0);
 1141:              }
 1142:              */
 1143: 
 1144:             /*
 1145:              * Note: this is a bit dangerous, but that's what it
 1146:              * takes to use nearly compatible signature for different
 1147:              * encodings.
 1148:              */
 1149:             xmlCharEncCloseFunc(input->buf->encoder);
 1150:             input->buf->encoder = handler;
 1151:             return (0);
 1152:         }
 1153:         input->buf->encoder = handler;
 1154: 
 1155:         /*
 1156:          * Is there already some content down the pipe to convert ?
 1157:          */
 1158:         if (xmlBufIsEmpty(input->buf->buffer) == 0) {
 1159:             int processed;
 1160: 	    unsigned int use;
 1161: 
 1162:             /*
 1163:              * Specific handling of the Byte Order Mark for
 1164:              * UTF-16
 1165:              */
 1166:             if ((handler->name != NULL) &&
 1167:                 (!strcmp(handler->name, "UTF-16LE") ||
 1168:                  !strcmp(handler->name, "UTF-16")) &&
 1169:                 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
 1170:                 input->cur += 2;
 1171:             }
 1172:             if ((handler->name != NULL) &&
 1173:                 (!strcmp(handler->name, "UTF-16BE")) &&
 1174:                 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
 1175:                 input->cur += 2;
 1176:             }
 1177:             /*
 1178:              * Errata on XML-1.0 June 20 2001
 1179:              * Specific handling of the Byte Order Mark for
 1180:              * UTF-8
 1181:              */
 1182:             if ((handler->name != NULL) &&
 1183:                 (!strcmp(handler->name, "UTF-8")) &&
 1184:                 (input->cur[0] == 0xEF) &&
 1185:                 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
 1186:                 input->cur += 3;
 1187:             }
 1188: 
 1189:             /*
 1190:              * Shrink the current input buffer.
 1191:              * Move it as the raw buffer and create a new input buffer
 1192:              */
 1193:             processed = input->cur - input->base;
 1194:             xmlBufShrink(input->buf->buffer, processed);
 1195:             input->buf->raw = input->buf->buffer;
 1196:             input->buf->buffer = xmlBufCreate();
 1197: 	    input->buf->rawconsumed = processed;
 1198: 	    use = xmlBufUse(input->buf->raw);
 1199: 
 1200:             if (ctxt->html) {
 1201:                 /*
 1202:                  * convert as much as possible of the buffer
 1203:                  */
 1204:                 nbchars = xmlCharEncInput(input->buf, 1);
 1205:             } else {
 1206:                 /*
 1207:                  * convert just enough to get
 1208:                  * '<?xml version="1.0" encoding="xxx"?>'
 1209:                  * parsed with the autodetected encoding
 1210:                  * into the parser reading buffer.
 1211:                  */
 1212:                 nbchars = xmlCharEncFirstLineInput(input->buf, len);
 1213:             }
 1214:             if (nbchars < 0) {
 1215:                 xmlErrInternal(ctxt,
 1216:                                "switching encoding: encoder error\n",
 1217:                                NULL);
 1218:                 return (-1);
 1219:             }
 1220: 	    input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
 1221:             xmlBufResetInput(input->buf->buffer, input);
 1222:         }
 1223:         return (0);
 1224:     } else if (input->length == 0) {
 1225: 	/*
 1226: 	 * When parsing a static memory array one must know the
 1227: 	 * size to be able to convert the buffer.
 1228: 	 */
 1229: 	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
 1230: 	return (-1);
 1231:     }
 1232:     return (0);
 1233: }
 1234: 
 1235: /**
 1236:  * xmlSwitchInputEncoding:
 1237:  * @ctxt:  the parser context
 1238:  * @input:  the input stream
 1239:  * @handler:  the encoding handler
 1240:  *
 1241:  * change the input functions when discovering the character encoding
 1242:  * of a given entity.
 1243:  *
 1244:  * Returns 0 in case of success, -1 otherwise
 1245:  */
 1246: int
 1247: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
 1248:                           xmlCharEncodingHandlerPtr handler) {
 1249:     return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
 1250: }
 1251: 
 1252: /**
 1253:  * xmlSwitchToEncodingInt:
 1254:  * @ctxt:  the parser context
 1255:  * @handler:  the encoding handler
 1256:  * @len: the length to convert or -1
 1257:  *
 1258:  * change the input functions when discovering the character encoding
 1259:  * of a given entity, and convert only @len bytes of the output, this
 1260:  * is needed on auto detect to allows any declared encoding later to
 1261:  * convert the actual content after the xmlDecl
 1262:  *
 1263:  * Returns 0 in case of success, -1 otherwise
 1264:  */
 1265: static int
 1266: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
 1267:                        xmlCharEncodingHandlerPtr handler, int len) {
 1268:     int ret = 0;
 1269: 
 1270:     if (handler != NULL) {
 1271:         if (ctxt->input != NULL) {
 1272: 	    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
 1273: 	} else {
 1274: 	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
 1275: 	                   NULL);
 1276: 	    return(-1);
 1277: 	}
 1278: 	/*
 1279: 	 * The parsing is now done in UTF8 natively
 1280: 	 */
 1281: 	ctxt->charset = XML_CHAR_ENCODING_UTF8;
 1282:     } else
 1283: 	return(-1);
 1284:     return(ret);
 1285: }
 1286: 
 1287: /**
 1288:  * xmlSwitchToEncoding:
 1289:  * @ctxt:  the parser context
 1290:  * @handler:  the encoding handler
 1291:  *
 1292:  * change the input functions when discovering the character encoding
 1293:  * of a given entity.
 1294:  *
 1295:  * Returns 0 in case of success, -1 otherwise
 1296:  */
 1297: int
 1298: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
 1299: {
 1300:     return (xmlSwitchToEncodingInt(ctxt, handler, -1));
 1301: }
 1302: 
 1303: /************************************************************************
 1304:  *									*
 1305:  *	Commodity functions to handle entities processing		*
 1306:  *									*
 1307:  ************************************************************************/
 1308: 
 1309: /**
 1310:  * xmlFreeInputStream:
 1311:  * @input:  an xmlParserInputPtr
 1312:  *
 1313:  * Free up an input stream.
 1314:  */
 1315: void
 1316: xmlFreeInputStream(xmlParserInputPtr input) {
 1317:     if (input == NULL) return;
 1318: 
 1319:     if (input->filename != NULL) xmlFree((char *) input->filename);
 1320:     if (input->directory != NULL) xmlFree((char *) input->directory);
 1321:     if (input->encoding != NULL) xmlFree((char *) input->encoding);
 1322:     if (input->version != NULL) xmlFree((char *) input->version);
 1323:     if ((input->free != NULL) && (input->base != NULL))
 1324:         input->free((xmlChar *) input->base);
 1325:     if (input->buf != NULL)
 1326:         xmlFreeParserInputBuffer(input->buf);
 1327:     xmlFree(input);
 1328: }
 1329: 
 1330: /**
 1331:  * xmlNewInputStream:
 1332:  * @ctxt:  an XML parser context
 1333:  *
 1334:  * Create a new input stream structure.
 1335:  *
 1336:  * Returns the new input stream or NULL
 1337:  */
 1338: xmlParserInputPtr
 1339: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
 1340:     xmlParserInputPtr input;
 1341: 
 1342:     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
 1343:     if (input == NULL) {
 1344:         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
 1345: 	return(NULL);
 1346:     }
 1347:     memset(input, 0, sizeof(xmlParserInput));
 1348:     input->line = 1;
 1349:     input->col = 1;
 1350:     input->standalone = -1;
 1351: 
 1352:     /*
 1353:      * If the context is NULL the id cannot be initialized, but that
 1354:      * should not happen while parsing which is the situation where
 1355:      * the id is actually needed.
 1356:      */
 1357:     if (ctxt != NULL)
 1358:         input->id = ctxt->input_id++;
 1359: 
 1360:     return(input);
 1361: }
 1362: 
 1363: /**
 1364:  * xmlNewIOInputStream:
 1365:  * @ctxt:  an XML parser context
 1366:  * @input:  an I/O Input
 1367:  * @enc:  the charset encoding if known
 1368:  *
 1369:  * Create a new input stream structure encapsulating the @input into
 1370:  * a stream suitable for the parser.
 1371:  *
 1372:  * Returns the new input stream or NULL
 1373:  */
 1374: xmlParserInputPtr
 1375: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
 1376: 	            xmlCharEncoding enc) {
 1377:     xmlParserInputPtr inputStream;
 1378: 
 1379:     if (input == NULL) return(NULL);
 1380:     if (xmlParserDebugEntities)
 1381: 	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
 1382:     inputStream = xmlNewInputStream(ctxt);
 1383:     if (inputStream == NULL) {
 1384: 	return(NULL);
 1385:     }
 1386:     inputStream->filename = NULL;
 1387:     inputStream->buf = input;
 1388:     xmlBufResetInput(inputStream->buf->buffer, inputStream);
 1389: 
 1390:     if (enc != XML_CHAR_ENCODING_NONE) {
 1391:         xmlSwitchEncoding(ctxt, enc);
 1392:     }
 1393: 
 1394:     return(inputStream);
 1395: }
 1396: 
 1397: /**
 1398:  * xmlNewEntityInputStream:
 1399:  * @ctxt:  an XML parser context
 1400:  * @entity:  an Entity pointer
 1401:  *
 1402:  * Create a new input stream based on an xmlEntityPtr
 1403:  *
 1404:  * Returns the new input stream or NULL
 1405:  */
 1406: xmlParserInputPtr
 1407: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
 1408:     xmlParserInputPtr input;
 1409: 
 1410:     if (entity == NULL) {
 1411:         xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
 1412: 	               NULL);
 1413: 	return(NULL);
 1414:     }
 1415:     if (xmlParserDebugEntities)
 1416: 	xmlGenericError(xmlGenericErrorContext,
 1417: 		"new input from entity: %s\n", entity->name);
 1418:     if (entity->content == NULL) {
 1419: 	switch (entity->etype) {
 1420:             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
 1421: 	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
 1422: 		               entity->name);
 1423:                 break;
 1424:             case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
 1425:             case XML_EXTERNAL_PARAMETER_ENTITY:
 1426: 		return(xmlLoadExternalEntity((char *) entity->URI,
 1427: 		       (char *) entity->ExternalID, ctxt));
 1428:             case XML_INTERNAL_GENERAL_ENTITY:
 1429: 	        xmlErrInternal(ctxt,
 1430: 		      "Internal entity %s without content !\n",
 1431: 		               entity->name);
 1432:                 break;
 1433:             case XML_INTERNAL_PARAMETER_ENTITY:
 1434: 	        xmlErrInternal(ctxt,
 1435: 		      "Internal parameter entity %s without content !\n",
 1436: 		               entity->name);
 1437:                 break;
 1438:             case XML_INTERNAL_PREDEFINED_ENTITY:
 1439: 	        xmlErrInternal(ctxt,
 1440: 		      "Predefined entity %s without content !\n",
 1441: 		               entity->name);
 1442:                 break;
 1443: 	}
 1444: 	return(NULL);
 1445:     }
 1446:     input = xmlNewInputStream(ctxt);
 1447:     if (input == NULL) {
 1448: 	return(NULL);
 1449:     }
 1450:     if (entity->URI != NULL)
 1451: 	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
 1452:     input->base = entity->content;
 1453:     input->cur = entity->content;
 1454:     input->length = entity->length;
 1455:     input->end = &entity->content[input->length];
 1456:     return(input);
 1457: }
 1458: 
 1459: /**
 1460:  * xmlNewStringInputStream:
 1461:  * @ctxt:  an XML parser context
 1462:  * @buffer:  an memory buffer
 1463:  *
 1464:  * Create a new input stream based on a memory buffer.
 1465:  * Returns the new input stream
 1466:  */
 1467: xmlParserInputPtr
 1468: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
 1469:     xmlParserInputPtr input;
 1470: 
 1471:     if (buffer == NULL) {
 1472:         xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
 1473: 	               NULL);
 1474: 	return(NULL);
 1475:     }
 1476:     if (xmlParserDebugEntities)
 1477: 	xmlGenericError(xmlGenericErrorContext,
 1478: 		"new fixed input: %.30s\n", buffer);
 1479:     input = xmlNewInputStream(ctxt);
 1480:     if (input == NULL) {
 1481:         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
 1482: 	return(NULL);
 1483:     }
 1484:     input->base = buffer;
 1485:     input->cur = buffer;
 1486:     input->length = xmlStrlen(buffer);
 1487:     input->end = &buffer[input->length];
 1488:     return(input);
 1489: }
 1490: 
 1491: /**
 1492:  * xmlNewInputFromFile:
 1493:  * @ctxt:  an XML parser context
 1494:  * @filename:  the filename to use as entity
 1495:  *
 1496:  * Create a new input stream based on a file or an URL.
 1497:  *
 1498:  * Returns the new input stream or NULL in case of error
 1499:  */
 1500: xmlParserInputPtr
 1501: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
 1502:     xmlParserInputBufferPtr buf;
 1503:     xmlParserInputPtr inputStream;
 1504:     char *directory = NULL;
 1505:     xmlChar *URI = NULL;
 1506: 
 1507:     if (xmlParserDebugEntities)
 1508: 	xmlGenericError(xmlGenericErrorContext,
 1509: 		"new input from file: %s\n", filename);
 1510:     if (ctxt == NULL) return(NULL);
 1511:     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
 1512:     if (buf == NULL) {
 1513: 	if (filename == NULL)
 1514: 	    __xmlLoaderErr(ctxt,
 1515: 	                   "failed to load external entity: NULL filename \n",
 1516: 			   NULL);
 1517: 	else
 1518: 	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
 1519: 			   (const char *) filename);
 1520: 	return(NULL);
 1521:     }
 1522: 
 1523:     inputStream = xmlNewInputStream(ctxt);
 1524:     if (inputStream == NULL)
 1525: 	return(NULL);
 1526: 
 1527:     inputStream->buf = buf;
 1528:     inputStream = xmlCheckHTTPInput(ctxt, inputStream);
 1529:     if (inputStream == NULL)
 1530:         return(NULL);
 1531: 
 1532:     if (inputStream->filename == NULL)
 1533: 	URI = xmlStrdup((xmlChar *) filename);
 1534:     else
 1535: 	URI = xmlStrdup((xmlChar *) inputStream->filename);
 1536:     directory = xmlParserGetDirectory((const char *) URI);
 1537:     if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
 1538:     inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
 1539:     if (URI != NULL) xmlFree((char *) URI);
 1540:     inputStream->directory = directory;
 1541: 
 1542:     xmlBufResetInput(inputStream->buf->buffer, inputStream);
 1543:     if ((ctxt->directory == NULL) && (directory != NULL))
 1544:         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
 1545:     return(inputStream);
 1546: }
 1547: 
 1548: /************************************************************************
 1549:  *									*
 1550:  *		Commodity functions to handle parser contexts		*
 1551:  *									*
 1552:  ************************************************************************/
 1553: 
 1554: /**
 1555:  * xmlInitParserCtxt:
 1556:  * @ctxt:  an XML parser context
 1557:  *
 1558:  * Initialize a parser context
 1559:  *
 1560:  * Returns 0 in case of success and -1 in case of error
 1561:  */
 1562: 
 1563: int
 1564: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
 1565: {
 1566:     xmlParserInputPtr input;
 1567: 
 1568:     if(ctxt==NULL) {
 1569:         xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
 1570:         return(-1);
 1571:     }
 1572: 
 1573:     xmlDefaultSAXHandlerInit();
 1574: 
 1575:     if (ctxt->dict == NULL)
 1576: 	ctxt->dict = xmlDictCreate();
 1577:     if (ctxt->dict == NULL) {
 1578:         xmlErrMemory(NULL, "cannot initialize parser context\n");
 1579: 	return(-1);
 1580:     }
 1581:     xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
 1582: 
 1583:     if (ctxt->sax == NULL)
 1584: 	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
 1585:     if (ctxt->sax == NULL) {
 1586:         xmlErrMemory(NULL, "cannot initialize parser context\n");
 1587: 	return(-1);
 1588:     }
 1589:     else
 1590:         xmlSAXVersion(ctxt->sax, 2);
 1591: 
 1592:     ctxt->maxatts = 0;
 1593:     ctxt->atts = NULL;
 1594:     /* Allocate the Input stack */
 1595:     if (ctxt->inputTab == NULL) {
 1596: 	ctxt->inputTab = (xmlParserInputPtr *)
 1597: 		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
 1598: 	ctxt->inputMax = 5;
 1599:     }
 1600:     if (ctxt->inputTab == NULL) {
 1601:         xmlErrMemory(NULL, "cannot initialize parser context\n");
 1602: 	ctxt->inputNr = 0;
 1603: 	ctxt->inputMax = 0;
 1604: 	ctxt->input = NULL;
 1605: 	return(-1);
 1606:     }
 1607:     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
 1608:         xmlFreeInputStream(input);
 1609:     }
 1610:     ctxt->inputNr = 0;
 1611:     ctxt->input = NULL;
 1612: 
 1613:     ctxt->version = NULL;
 1614:     ctxt->encoding = NULL;
 1615:     ctxt->standalone = -1;
 1616:     ctxt->hasExternalSubset = 0;
 1617:     ctxt->hasPErefs = 0;
 1618:     ctxt->html = 0;
 1619:     ctxt->external = 0;
 1620:     ctxt->instate = XML_PARSER_START;
 1621:     ctxt->token = 0;
 1622:     ctxt->directory = NULL;
 1623: 
 1624:     /* Allocate the Node stack */
 1625:     if (ctxt->nodeTab == NULL) {
 1626: 	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
 1627: 	ctxt->nodeMax = 10;
 1628:     }
 1629:     if (ctxt->nodeTab == NULL) {
 1630:         xmlErrMemory(NULL, "cannot initialize parser context\n");
 1631: 	ctxt->nodeNr = 0;
 1632: 	ctxt->nodeMax = 0;
 1633: 	ctxt->node = NULL;
 1634: 	ctxt->inputNr = 0;
 1635: 	ctxt->inputMax = 0;
 1636: 	ctxt->input = NULL;
 1637: 	return(-1);
 1638:     }
 1639:     ctxt->nodeNr = 0;
 1640:     ctxt->node = NULL;
 1641: 
 1642:     /* Allocate the Name stack */
 1643:     if (ctxt->nameTab == NULL) {
 1644: 	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
 1645: 	ctxt->nameMax = 10;
 1646:     }
 1647:     if (ctxt->nameTab == NULL) {
 1648:         xmlErrMemory(NULL, "cannot initialize parser context\n");
 1649: 	ctxt->nodeNr = 0;
 1650: 	ctxt->nodeMax = 0;
 1651: 	ctxt->node = NULL;
 1652: 	ctxt->inputNr = 0;
 1653: 	ctxt->inputMax = 0;
 1654: 	ctxt->input = NULL;
 1655: 	ctxt->nameNr = 0;
 1656: 	ctxt->nameMax = 0;
 1657: 	ctxt->name = NULL;
 1658: 	return(-1);
 1659:     }
 1660:     ctxt->nameNr = 0;
 1661:     ctxt->name = NULL;
 1662: 
 1663:     /* Allocate the space stack */
 1664:     if (ctxt->spaceTab == NULL) {
 1665: 	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
 1666: 	ctxt->spaceMax = 10;
 1667:     }
 1668:     if (ctxt->spaceTab == NULL) {
 1669:         xmlErrMemory(NULL, "cannot initialize parser context\n");
 1670: 	ctxt->nodeNr = 0;
 1671: 	ctxt->nodeMax = 0;
 1672: 	ctxt->node = NULL;
 1673: 	ctxt->inputNr = 0;
 1674: 	ctxt->inputMax = 0;
 1675: 	ctxt->input = NULL;
 1676: 	ctxt->nameNr = 0;
 1677: 	ctxt->nameMax = 0;
 1678: 	ctxt->name = NULL;
 1679: 	ctxt->spaceNr = 0;
 1680: 	ctxt->spaceMax = 0;
 1681: 	ctxt->space = NULL;
 1682: 	return(-1);
 1683:     }
 1684:     ctxt->spaceNr = 1;
 1685:     ctxt->spaceMax = 10;
 1686:     ctxt->spaceTab[0] = -1;
 1687:     ctxt->space = &ctxt->spaceTab[0];
 1688:     ctxt->userData = ctxt;
 1689:     ctxt->myDoc = NULL;
 1690:     ctxt->wellFormed = 1;
 1691:     ctxt->nsWellFormed = 1;
 1692:     ctxt->valid = 1;
 1693:     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
 1694:     ctxt->validate = xmlDoValidityCheckingDefaultValue;
 1695:     ctxt->pedantic = xmlPedanticParserDefaultValue;
 1696:     ctxt->linenumbers = xmlLineNumbersDefaultValue;
 1697:     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
 1698:     if (ctxt->keepBlanks == 0)
 1699: 	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
 1700: 
 1701:     ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
 1702:     ctxt->vctxt.userData = ctxt;
 1703:     ctxt->vctxt.error = xmlParserValidityError;
 1704:     ctxt->vctxt.warning = xmlParserValidityWarning;
 1705:     if (ctxt->validate) {
 1706: 	if (xmlGetWarningsDefaultValue == 0)
 1707: 	    ctxt->vctxt.warning = NULL;
 1708: 	else
 1709: 	    ctxt->vctxt.warning = xmlParserValidityWarning;
 1710: 	ctxt->vctxt.nodeMax = 0;
 1711:     }
 1712:     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
 1713:     ctxt->record_info = 0;
 1714:     ctxt->nbChars = 0;
 1715:     ctxt->checkIndex = 0;
 1716:     ctxt->inSubset = 0;
 1717:     ctxt->errNo = XML_ERR_OK;
 1718:     ctxt->depth = 0;
 1719:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
 1720:     ctxt->catalogs = NULL;
 1721:     ctxt->nbentities = 0;
 1722:     ctxt->sizeentities = 0;
 1723:     ctxt->sizeentcopy = 0;
 1724:     ctxt->input_id = 1;
 1725:     xmlInitNodeInfoSeq(&ctxt->node_seq);
 1726:     return(0);
 1727: }
 1728: 
 1729: /**
 1730:  * xmlFreeParserCtxt:
 1731:  * @ctxt:  an XML parser context
 1732:  *
 1733:  * Free all the memory used by a parser context. However the parsed
 1734:  * document in ctxt->myDoc is not freed.
 1735:  */
 1736: 
 1737: void
 1738: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
 1739: {
 1740:     xmlParserInputPtr input;
 1741: 
 1742:     if (ctxt == NULL) return;
 1743: 
 1744:     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
 1745:         xmlFreeInputStream(input);
 1746:     }
 1747:     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
 1748:     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
 1749:     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
 1750:     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
 1751:     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
 1752:     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
 1753:     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
 1754:     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
 1755:     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
 1756: #ifdef LIBXML_SAX1_ENABLED
 1757:     if ((ctxt->sax != NULL) &&
 1758:         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
 1759: #else
 1760:     if (ctxt->sax != NULL)
 1761: #endif /* LIBXML_SAX1_ENABLED */
 1762:         xmlFree(ctxt->sax);
 1763:     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
 1764:     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
 1765:     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
 1766:     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
 1767:     if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
 1768:     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
 1769:     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
 1770:     if (ctxt->attsDefault != NULL)
 1771:         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
 1772:     if (ctxt->attsSpecial != NULL)
 1773:         xmlHashFree(ctxt->attsSpecial, NULL);
 1774:     if (ctxt->freeElems != NULL) {
 1775:         xmlNodePtr cur, next;
 1776: 
 1777: 	cur = ctxt->freeElems;
 1778: 	while (cur != NULL) {
 1779: 	    next = cur->next;
 1780: 	    xmlFree(cur);
 1781: 	    cur = next;
 1782: 	}
 1783:     }
 1784:     if (ctxt->freeAttrs != NULL) {
 1785:         xmlAttrPtr cur, next;
 1786: 
 1787: 	cur = ctxt->freeAttrs;
 1788: 	while (cur != NULL) {
 1789: 	    next = cur->next;
 1790: 	    xmlFree(cur);
 1791: 	    cur = next;
 1792: 	}
 1793:     }
 1794:     /*
 1795:      * cleanup the error strings
 1796:      */
 1797:     if (ctxt->lastError.message != NULL)
 1798:         xmlFree(ctxt->lastError.message);
 1799:     if (ctxt->lastError.file != NULL)
 1800:         xmlFree(ctxt->lastError.file);
 1801:     if (ctxt->lastError.str1 != NULL)
 1802:         xmlFree(ctxt->lastError.str1);
 1803:     if (ctxt->lastError.str2 != NULL)
 1804:         xmlFree(ctxt->lastError.str2);
 1805:     if (ctxt->lastError.str3 != NULL)
 1806:         xmlFree(ctxt->lastError.str3);
 1807: 
 1808: #ifdef LIBXML_CATALOG_ENABLED
 1809:     if (ctxt->catalogs != NULL)
 1810: 	xmlCatalogFreeLocal(ctxt->catalogs);
 1811: #endif
 1812:     xmlFree(ctxt);
 1813: }
 1814: 
 1815: /**
 1816:  * xmlNewParserCtxt:
 1817:  *
 1818:  * Allocate and initialize a new parser context.
 1819:  *
 1820:  * Returns the xmlParserCtxtPtr or NULL
 1821:  */
 1822: 
 1823: xmlParserCtxtPtr
 1824: xmlNewParserCtxt(void)
 1825: {
 1826:     xmlParserCtxtPtr ctxt;
 1827: 
 1828:     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
 1829:     if (ctxt == NULL) {
 1830: 	xmlErrMemory(NULL, "cannot allocate parser context\n");
 1831: 	return(NULL);
 1832:     }
 1833:     memset(ctxt, 0, sizeof(xmlParserCtxt));
 1834:     if (xmlInitParserCtxt(ctxt) < 0) {
 1835:         xmlFreeParserCtxt(ctxt);
 1836: 	return(NULL);
 1837:     }
 1838:     return(ctxt);
 1839: }
 1840: 
 1841: /************************************************************************
 1842:  *									*
 1843:  *		Handling of node informations				*
 1844:  *									*
 1845:  ************************************************************************/
 1846: 
 1847: /**
 1848:  * xmlClearParserCtxt:
 1849:  * @ctxt:  an XML parser context
 1850:  *
 1851:  * Clear (release owned resources) and reinitialize a parser context
 1852:  */
 1853: 
 1854: void
 1855: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
 1856: {
 1857:   if (ctxt==NULL)
 1858:     return;
 1859:   xmlClearNodeInfoSeq(&ctxt->node_seq);
 1860:   xmlCtxtReset(ctxt);
 1861: }
 1862: 
 1863: 
 1864: /**
 1865:  * xmlParserFindNodeInfo:
 1866:  * @ctx:  an XML parser context
 1867:  * @node:  an XML node within the tree
 1868:  *
 1869:  * Find the parser node info struct for a given node
 1870:  *
 1871:  * Returns an xmlParserNodeInfo block pointer or NULL
 1872:  */
 1873: const xmlParserNodeInfo *
 1874: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
 1875: {
 1876:     unsigned long pos;
 1877: 
 1878:     if ((ctx == NULL) || (node == NULL))
 1879:         return (NULL);
 1880:     /* Find position where node should be at */
 1881:     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
 1882:     if (pos < ctx->node_seq.length
 1883:         && ctx->node_seq.buffer[pos].node == node)
 1884:         return &ctx->node_seq.buffer[pos];
 1885:     else
 1886:         return NULL;
 1887: }
 1888: 
 1889: 
 1890: /**
 1891:  * xmlInitNodeInfoSeq:
 1892:  * @seq:  a node info sequence pointer
 1893:  *
 1894:  * -- Initialize (set to initial state) node info sequence
 1895:  */
 1896: void
 1897: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
 1898: {
 1899:     if (seq == NULL)
 1900:         return;
 1901:     seq->length = 0;
 1902:     seq->maximum = 0;
 1903:     seq->buffer = NULL;
 1904: }
 1905: 
 1906: /**
 1907:  * xmlClearNodeInfoSeq:
 1908:  * @seq:  a node info sequence pointer
 1909:  *
 1910:  * -- Clear (release memory and reinitialize) node
 1911:  *   info sequence
 1912:  */
 1913: void
 1914: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
 1915: {
 1916:     if (seq == NULL)
 1917:         return;
 1918:     if (seq->buffer != NULL)
 1919:         xmlFree(seq->buffer);
 1920:     xmlInitNodeInfoSeq(seq);
 1921: }
 1922: 
 1923: /**
 1924:  * xmlParserFindNodeInfoIndex:
 1925:  * @seq:  a node info sequence pointer
 1926:  * @node:  an XML node pointer
 1927:  *
 1928:  *
 1929:  * xmlParserFindNodeInfoIndex : Find the index that the info record for
 1930:  *   the given node is or should be at in a sorted sequence
 1931:  *
 1932:  * Returns a long indicating the position of the record
 1933:  */
 1934: unsigned long
 1935: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
 1936:                            const xmlNodePtr node)
 1937: {
 1938:     unsigned long upper, lower, middle;
 1939:     int found = 0;
 1940: 
 1941:     if ((seq == NULL) || (node == NULL))
 1942:         return ((unsigned long) -1);
 1943: 
 1944:     /* Do a binary search for the key */
 1945:     lower = 1;
 1946:     upper = seq->length;
 1947:     middle = 0;
 1948:     while (lower <= upper && !found) {
 1949:         middle = lower + (upper - lower) / 2;
 1950:         if (node == seq->buffer[middle - 1].node)
 1951:             found = 1;
 1952:         else if (node < seq->buffer[middle - 1].node)
 1953:             upper = middle - 1;
 1954:         else
 1955:             lower = middle + 1;
 1956:     }
 1957: 
 1958:     /* Return position */
 1959:     if (middle == 0 || seq->buffer[middle - 1].node < node)
 1960:         return middle;
 1961:     else
 1962:         return middle - 1;
 1963: }
 1964: 
 1965: 
 1966: /**
 1967:  * xmlParserAddNodeInfo:
 1968:  * @ctxt:  an XML parser context
 1969:  * @info:  a node info sequence pointer
 1970:  *
 1971:  * Insert node info record into the sorted sequence
 1972:  */
 1973: void
 1974: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
 1975:                      const xmlParserNodeInfoPtr info)
 1976: {
 1977:     unsigned long pos;
 1978: 
 1979:     if ((ctxt == NULL) || (info == NULL)) return;
 1980: 
 1981:     /* Find pos and check to see if node is already in the sequence */
 1982:     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
 1983:                                      info->node);
 1984: 
 1985:     if ((pos < ctxt->node_seq.length) &&
 1986:         (ctxt->node_seq.buffer != NULL) &&
 1987:         (ctxt->node_seq.buffer[pos].node == info->node)) {
 1988:         ctxt->node_seq.buffer[pos] = *info;
 1989:     }
 1990: 
 1991:     /* Otherwise, we need to add new node to buffer */
 1992:     else {
 1993:         if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
 1994:             xmlParserNodeInfo *tmp_buffer;
 1995:             unsigned int byte_size;
 1996: 
 1997:             if (ctxt->node_seq.maximum == 0)
 1998:                 ctxt->node_seq.maximum = 2;
 1999:             byte_size = (sizeof(*ctxt->node_seq.buffer) *
 2000: 			(2 * ctxt->node_seq.maximum));
 2001: 
 2002:             if (ctxt->node_seq.buffer == NULL)
 2003:                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
 2004:             else
 2005:                 tmp_buffer =
 2006:                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
 2007:                                                      byte_size);
 2008: 
 2009:             if (tmp_buffer == NULL) {
 2010: 		xmlErrMemory(ctxt, "failed to allocate buffer\n");
 2011:                 return;
 2012:             }
 2013:             ctxt->node_seq.buffer = tmp_buffer;
 2014:             ctxt->node_seq.maximum *= 2;
 2015:         }
 2016: 
 2017:         /* If position is not at end, move elements out of the way */
 2018:         if (pos != ctxt->node_seq.length) {
 2019:             unsigned long i;
 2020: 
 2021:             for (i = ctxt->node_seq.length; i > pos; i--)
 2022:                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
 2023:         }
 2024: 
 2025:         /* Copy element and increase length */
 2026:         ctxt->node_seq.buffer[pos] = *info;
 2027:         ctxt->node_seq.length++;
 2028:     }
 2029: }
 2030: 
 2031: /************************************************************************
 2032:  *									*
 2033:  *		Defaults settings					*
 2034:  *									*
 2035:  ************************************************************************/
 2036: /**
 2037:  * xmlPedanticParserDefault:
 2038:  * @val:  int 0 or 1
 2039:  *
 2040:  * Set and return the previous value for enabling pedantic warnings.
 2041:  *
 2042:  * Returns the last value for 0 for no substitution, 1 for substitution.
 2043:  */
 2044: 
 2045: int
 2046: xmlPedanticParserDefault(int val) {
 2047:     int old = xmlPedanticParserDefaultValue;
 2048: 
 2049:     xmlPedanticParserDefaultValue = val;
 2050:     return(old);
 2051: }
 2052: 
 2053: /**
 2054:  * xmlLineNumbersDefault:
 2055:  * @val:  int 0 or 1
 2056:  *
 2057:  * Set and return the previous value for enabling line numbers in elements
 2058:  * contents. This may break on old application and is turned off by default.
 2059:  *
 2060:  * Returns the last value for 0 for no substitution, 1 for substitution.
 2061:  */
 2062: 
 2063: int
 2064: xmlLineNumbersDefault(int val) {
 2065:     int old = xmlLineNumbersDefaultValue;
 2066: 
 2067:     xmlLineNumbersDefaultValue = val;
 2068:     return(old);
 2069: }
 2070: 
 2071: /**
 2072:  * xmlSubstituteEntitiesDefault:
 2073:  * @val:  int 0 or 1
 2074:  *
 2075:  * Set and return the previous value for default entity support.
 2076:  * Initially the parser always keep entity references instead of substituting
 2077:  * entity values in the output. This function has to be used to change the
 2078:  * default parser behavior
 2079:  * SAX::substituteEntities() has to be used for changing that on a file by
 2080:  * file basis.
 2081:  *
 2082:  * Returns the last value for 0 for no substitution, 1 for substitution.
 2083:  */
 2084: 
 2085: int
 2086: xmlSubstituteEntitiesDefault(int val) {
 2087:     int old = xmlSubstituteEntitiesDefaultValue;
 2088: 
 2089:     xmlSubstituteEntitiesDefaultValue = val;
 2090:     return(old);
 2091: }
 2092: 
 2093: /**
 2094:  * xmlKeepBlanksDefault:
 2095:  * @val:  int 0 or 1
 2096:  *
 2097:  * Set and return the previous value for default blanks text nodes support.
 2098:  * The 1.x version of the parser used an heuristic to try to detect
 2099:  * ignorable white spaces. As a result the SAX callback was generating
 2100:  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
 2101:  * using the DOM output text nodes containing those blanks were not generated.
 2102:  * The 2.x and later version will switch to the XML standard way and
 2103:  * ignorableWhitespace() are only generated when running the parser in
 2104:  * validating mode and when the current element doesn't allow CDATA or
 2105:  * mixed content.
 2106:  * This function is provided as a way to force the standard behavior
 2107:  * on 1.X libs and to switch back to the old mode for compatibility when
 2108:  * running 1.X client code on 2.X . Upgrade of 1.X code should be done
 2109:  * by using xmlIsBlankNode() commodity function to detect the "empty"
 2110:  * nodes generated.
 2111:  * This value also affect autogeneration of indentation when saving code
 2112:  * if blanks sections are kept, indentation is not generated.
 2113:  *
 2114:  * Returns the last value for 0 for no substitution, 1 for substitution.
 2115:  */
 2116: 
 2117: int
 2118: xmlKeepBlanksDefault(int val) {
 2119:     int old = xmlKeepBlanksDefaultValue;
 2120: 
 2121:     xmlKeepBlanksDefaultValue = val;
 2122:     if (!val) xmlIndentTreeOutput = 1;
 2123:     return(old);
 2124: }
 2125: 
 2126: #define bottom_parserInternals
 2127: #include "elfgcchack.h"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>