--- embedaddon/libxml2/parser.c 2013/07/22 01:22:20 1.1.1.2 +++ embedaddon/libxml2/parser.c 2014/06/15 19:53:29 1.1.1.3 @@ -17,7 +17,7 @@ * parserInternals.c to reduce this file size. * As much as possible the functions are associated with their relative * production in the XML specification. A few productions defining the - * different ranges of character are actually implanted either in + * different ranges of character are actually implanted either in * parserInternals.h or parserInternals.c * The DOM tree build is realized from the default SAX callbacks in * the module SAX.c. @@ -40,6 +40,7 @@ #endif #include +#include #include #include #include @@ -83,6 +84,9 @@ #include #endif +#include "buf.h" +#include "enc.h" + static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); @@ -117,17 +121,34 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, * parser option. */ static int -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, - xmlEntityPtr ent) +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, + xmlEntityPtr ent, size_t replacement) { - unsigned long consumed = 0; + size_t consumed = 0; if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) return (0); if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) return (1); - if (size != 0) { + if (replacement != 0) { + if (replacement < XML_MAX_TEXT_LENGTH) + return(0); + /* + * If the volume of entity copy reaches 10 times the + * amount of parsed data and over the large text threshold + * then that's very likely to be an abuse. + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if (replacement < XML_PARSER_NON_LINEAR * consumed) + return(0); + } else if (size != 0) { + /* * Do the check based on the replacement size of the entity */ if (size < XML_PARSER_BIG_ENTITY) @@ -149,7 +170,7 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned l /* * use the number of parsed entities in the replacement */ - size = ent->checked; + size = ent->checked / 2; /* * The amount of data parsed counting entities size only once @@ -172,7 +193,6 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned l */ return (0); } - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return (1); } @@ -194,6 +214,17 @@ unsigned int xmlParserMaxDepth = 256; #define XML_PARSER_BUFFER_SIZE 100 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" +/** + * XML_PARSER_CHUNK_SIZE + * + * When calling GROW that's the minimal amount of data + * the parser expected to have received. It is not a hard + * limit but an optimization when reading strings like Names + * It is not strictly needed as long as inputs available characters + * are followed by 0, which should be provided by the I/O level + */ +#define XML_PARSER_CHUNK_SIZE 100 + /* * List of XML prefixed PI allowed by W3C specs */ @@ -233,7 +264,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityP /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -285,193 +316,201 @@ static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) { const char *errmsg; + char errstr[129] = ""; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; switch (error) { case XML_ERR_INVALID_HEX_CHARREF: - errmsg = "CharRef: invalid hexadecimal value\n"; + errmsg = "CharRef: invalid hexadecimal value"; break; case XML_ERR_INVALID_DEC_CHARREF: - errmsg = "CharRef: invalid decimal value\n"; + errmsg = "CharRef: invalid decimal value"; break; case XML_ERR_INVALID_CHARREF: - errmsg = "CharRef: invalid value\n"; + errmsg = "CharRef: invalid value"; break; case XML_ERR_INTERNAL_ERROR: errmsg = "internal error"; break; case XML_ERR_PEREF_AT_EOF: - errmsg = "PEReference at end of document\n"; + errmsg = "PEReference at end of document"; break; case XML_ERR_PEREF_IN_PROLOG: - errmsg = "PEReference in prolog\n"; + errmsg = "PEReference in prolog"; break; case XML_ERR_PEREF_IN_EPILOG: - errmsg = "PEReference in epilog\n"; + errmsg = "PEReference in epilog"; break; case XML_ERR_PEREF_NO_NAME: - errmsg = "PEReference: no name\n"; + errmsg = "PEReference: no name"; break; case XML_ERR_PEREF_SEMICOL_MISSING: - errmsg = "PEReference: expecting ';'\n"; + errmsg = "PEReference: expecting ';'"; break; case XML_ERR_ENTITY_LOOP: - errmsg = "Detected an entity reference loop\n"; + errmsg = "Detected an entity reference loop"; break; case XML_ERR_ENTITY_NOT_STARTED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ENTITY_PE_INTERNAL: - errmsg = "PEReferences forbidden in internal subset\n"; + errmsg = "PEReferences forbidden in internal subset"; break; case XML_ERR_ENTITY_NOT_FINISHED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ATTRIBUTE_NOT_STARTED: - errmsg = "AttValue: \" or ' expected\n"; + errmsg = "AttValue: \" or ' expected"; break; case XML_ERR_LT_IN_ATTRIBUTE: - errmsg = "Unescaped '<' not allowed in attributes values\n"; + errmsg = "Unescaped '<' not allowed in attributes values"; break; case XML_ERR_LITERAL_NOT_STARTED: - errmsg = "SystemLiteral \" or ' expected\n"; + errmsg = "SystemLiteral \" or ' expected"; break; case XML_ERR_LITERAL_NOT_FINISHED: - errmsg = "Unfinished System or Public ID \" or ' expected\n"; + errmsg = "Unfinished System or Public ID \" or ' expected"; break; case XML_ERR_MISPLACED_CDATA_END: - errmsg = "Sequence ']]>' not allowed in content\n"; + errmsg = "Sequence ']]>' not allowed in content"; break; case XML_ERR_URI_REQUIRED: - errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; + errmsg = "SYSTEM or PUBLIC, the URI is missing"; break; case XML_ERR_PUBID_REQUIRED: - errmsg = "PUBLIC, the Public Identifier is missing\n"; + errmsg = "PUBLIC, the Public Identifier is missing"; break; case XML_ERR_HYPHEN_IN_COMMENT: - errmsg = "Comment must not contain '--' (double-hyphen)\n"; + errmsg = "Comment must not contain '--' (double-hyphen)"; break; case XML_ERR_PI_NOT_STARTED: - errmsg = "xmlParsePI : no target name\n"; + errmsg = "xmlParsePI : no target name"; break; case XML_ERR_RESERVED_XML_NAME: - errmsg = "Invalid PI name\n"; + errmsg = "Invalid PI name"; break; case XML_ERR_NOTATION_NOT_STARTED: - errmsg = "NOTATION: Name expected here\n"; + errmsg = "NOTATION: Name expected here"; break; case XML_ERR_NOTATION_NOT_FINISHED: - errmsg = "'>' required to close NOTATION declaration\n"; + errmsg = "'>' required to close NOTATION declaration"; break; case XML_ERR_VALUE_REQUIRED: - errmsg = "Entity value required\n"; + errmsg = "Entity value required"; break; case XML_ERR_URI_FRAGMENT: errmsg = "Fragment not allowed"; break; case XML_ERR_ATTLIST_NOT_STARTED: - errmsg = "'(' required to start ATTLIST enumeration\n"; + errmsg = "'(' required to start ATTLIST enumeration"; break; case XML_ERR_NMTOKEN_REQUIRED: - errmsg = "NmToken expected in ATTLIST enumeration\n"; + errmsg = "NmToken expected in ATTLIST enumeration"; break; case XML_ERR_ATTLIST_NOT_FINISHED: - errmsg = "')' required to finish ATTLIST enumeration\n"; + errmsg = "')' required to finish ATTLIST enumeration"; break; case XML_ERR_MIXED_NOT_STARTED: - errmsg = "MixedContentDecl : '|' or ')*' expected\n"; + errmsg = "MixedContentDecl : '|' or ')*' expected"; break; case XML_ERR_PCDATA_REQUIRED: - errmsg = "MixedContentDecl : '#PCDATA' expected\n"; + errmsg = "MixedContentDecl : '#PCDATA' expected"; break; case XML_ERR_ELEMCONTENT_NOT_STARTED: - errmsg = "ContentDecl : Name or '(' expected\n"; + errmsg = "ContentDecl : Name or '(' expected"; break; case XML_ERR_ELEMCONTENT_NOT_FINISHED: - errmsg = "ContentDecl : ',' '|' or ')' expected\n"; + errmsg = "ContentDecl : ',' '|' or ')' expected"; break; case XML_ERR_PEREF_IN_INT_SUBSET: errmsg = - "PEReference: forbidden within markup decl in internal subset\n"; + "PEReference: forbidden within markup decl in internal subset"; break; case XML_ERR_GT_REQUIRED: - errmsg = "expected '>'\n"; + errmsg = "expected '>'"; break; case XML_ERR_CONDSEC_INVALID: - errmsg = "XML conditional section '[' expected\n"; + errmsg = "XML conditional section '[' expected"; break; case XML_ERR_EXT_SUBSET_NOT_FINISHED: - errmsg = "Content error in the external subset\n"; + errmsg = "Content error in the external subset"; break; case XML_ERR_CONDSEC_INVALID_KEYWORD: errmsg = - "conditional section INCLUDE or IGNORE keyword expected\n"; + "conditional section INCLUDE or IGNORE keyword expected"; break; case XML_ERR_CONDSEC_NOT_FINISHED: - errmsg = "XML conditional section not closed\n"; + errmsg = "XML conditional section not closed"; break; case XML_ERR_XMLDECL_NOT_STARTED: - errmsg = "Text declaration '' expected\n"; + errmsg = "parsing XML declaration: '?>' expected"; break; case XML_ERR_EXT_ENTITY_STANDALONE: - errmsg = "external parsed entities cannot be standalone\n"; + errmsg = "external parsed entities cannot be standalone"; break; case XML_ERR_ENTITYREF_SEMICOL_MISSING: - errmsg = "EntityRef: expecting ';'\n"; + errmsg = "EntityRef: expecting ';'"; break; case XML_ERR_DOCTYPE_NOT_FINISHED: - errmsg = "DOCTYPE improperly terminated\n"; + errmsg = "DOCTYPE improperly terminated"; break; case XML_ERR_LTSLASH_REQUIRED: - errmsg = "EndTag: 'errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], info); if (ctxt != NULL) { ctxt->wellFormed = 0; @@ -626,7 +665,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErro */ static void xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, int val, + const char *msg, const xmlChar *str1, int val, const xmlChar *str2) { if ((ctxt != NULL) && (ctxt->disableSAX != 0) && @@ -754,7 +793,7 @@ xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error /************************************************************************ * * - * Library wide options * + * Library wide options * * * ************************************************************************/ @@ -978,7 +1017,7 @@ xmlHasFeature(xmlFeature feature) /************************************************************************ * * - * SAX2 defaulted attributes handling * + * SAX2 defaulted attributes handling * * * ************************************************************************/ @@ -1002,8 +1041,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) { ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); - if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || - (ctxt->str_xml_ns == NULL)) { + if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || + (ctxt->str_xml_ns == NULL)) { xmlErrMemory(ctxt, NULL); } } @@ -1517,7 +1556,7 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, c { if (ctxt->options & XML_PARSE_NSCLEAN) { int i; - for (i = 0;i < ctxt->nsNr;i += 2) { + for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { if (ctxt->nsTab[i] == prefix) { /* in scope */ if (ctxt->nsTab[i + 1] == URL) @@ -1922,7 +1961,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings without newlines within the parser. - * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII + * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII * defined char within the parser. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * @@ -1971,10 +2010,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) { #define SKIPL(val) do { \ int skipl; \ for(skipl=0; skiplinput->cur) == '\n') { \ + if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ - } else ctxt->input->col++; \ - ctxt->nbChars++; \ + } else ctxt->input->col++; \ + ctxt->nbChars++; \ ctxt->input->cur++; \ } \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ @@ -2000,6 +2039,13 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) { xmlGROW (ctxt); static void xmlGROW (xmlParserCtxtPtr ctxt) { + if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + ctxt->instate = XML_PARSER_EOF; + } xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) @@ -2144,6 +2190,8 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); } ret = inputPush(ctxt, input); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); GROW; return(ret); } @@ -2159,7 +2207,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error */ @@ -2180,8 +2228,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 16 + (CUR - '0'); else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) val = val * 16 + (CUR - 'a') + 10; @@ -2211,8 +2261,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 10 + (CUR - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -2238,7 +2290,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -2263,7 +2315,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error, str will be * updated to the current value of the index @@ -2282,7 +2334,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml ptr += 3; cur = *ptr; while (cur != ';') { /* Non input consuming loop */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) val = val * 16 + (cur - 'a') + 10; @@ -2305,7 +2357,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml ptr += 2; cur = *ptr; while (cur != ';') { /* Non input consuming loops */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -2329,7 +2381,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -2351,9 +2403,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml * * Returns the new input stream or NULL */ - + static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} - + static xmlParserInputPtr xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; @@ -2376,7 +2428,7 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, if (buffer == NULL) { xmlErrMemory(ctxt, NULL); xmlFree(input); - return(NULL); + return(NULL); } buffer [0] = ' '; buffer [1] = '%'; @@ -2395,12 +2447,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, /** * xmlParserHandlePEReference: * @ctxt: the parser context - * + * * [69] PEReference ::= '%' Name ';' * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -2418,9 +2470,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, * NOTE: misleading but this is handled. * * A PEReference may have been detected in the current input stream - * the handling is done accordingly to + * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc - * i.e. + * i.e. * - Included in literal in entity values * - Included as Parameter Entity reference within DTDs */ @@ -2497,8 +2549,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) + return; if (entity == NULL) { - + /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an @@ -2524,7 +2578,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); - } else + } else xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); @@ -2549,7 +2603,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if (xmlPushInput(ctxt, input) < 0) return; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -2559,6 +2613,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * the amount of data in the buffer. */ GROW + if (ctxt->instate == XML_PARSER_EOF) + return; if ((ctxt->input->end - ctxt->input->cur)>=4) { start[0] = RAW; start[1] = NXT(1); @@ -2589,15 +2645,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { /* * Macro used to grow the current buffer. + * buffer##_size is expected to be a size_t + * mem_error: is expected to handle memory allocation failures */ #define growBuffer(buffer, n) { \ xmlChar *tmp; \ - buffer##_size *= 2; \ - buffer##_size += n; \ - tmp = (xmlChar *) \ - xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + size_t new_size = buffer##_size * 2 + n; \ + if (new_size < buffer##_size) goto mem_error; \ + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ if (tmp == NULL) goto mem_error; \ buffer = tmp; \ + buffer##_size = new_size; \ } /** @@ -2609,7 +2667,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -2623,14 +2681,14 @@ xmlChar * xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; - int buffer_size = 0; + size_t buffer_size = 0; + size_t nbchars = 0; xmlChar *current = NULL; xmlChar *rep = NULL; const xmlChar *last; xmlEntityPtr ent; int c,l; - int nbchars = 0; if ((ctxt == NULL) || (str == NULL) || (len < 0)) return(NULL); @@ -2647,7 +2705,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; - buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); + buffer = (xmlChar *) xmlMallocAtomic(buffer_size); if (buffer == NULL) goto mem_error; /* @@ -2667,7 +2725,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { @@ -2680,12 +2738,12 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) goto int_error; if (ent != NULL) - ctxt->nbentities += ent->checked; + ctxt->nbentities += ent->checked / 2; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else { @@ -2702,9 +2760,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2717,7 +2774,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; - if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); } for (;i > 0;i--) @@ -2732,7 +2789,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) goto int_error; if (ent != NULL) - ctxt->nbentities += ent->checked; + ctxt->nbentities += ent->checked / 2; if (ent != NULL) { if (ent->content == NULL) { xmlLoadEntityContent(ctxt, ent); @@ -2745,9 +2802,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2759,8 +2815,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons } else { COPY_BUF(l,buffer,nbchars,c); str += l; - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } if (str < last) @@ -2789,7 +2845,7 @@ int_error: * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -3152,7 +3208,7 @@ xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { } else { if ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || - (c == '_') || (c == ':') || + (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) return(1); @@ -3177,6 +3233,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { * Handler for more complex cases */ GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); if ((ctxt->options & XML_PARSE_OLD10) == 0) { /* @@ -3225,9 +3283,11 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { ((c >= 0xFDF0) && (c <= 0xFFFD)) || ((c >= 0x10000) && (c <= 0xEFFFF)) )) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); @@ -3246,18 +3306,32 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || - (c == '_') || (c == ':') || + (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); @@ -3307,6 +3381,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -3342,21 +3421,40 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ (xmlIsNameChar(ctxt, c) && (c != ':'))) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); } /** * xmlParseNCName: * @ctxt: an XML parser context - * @len: lenght of the string parsed + * @len: length of the string parsed * * parse an XML name. * @@ -3394,6 +3492,11 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -3425,6 +3528,8 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const xmlChar *ret; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { @@ -3460,7 +3565,7 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar * * [6] Names ::= Name (#x20 Name)* * - * Returns the Name parsed or NULL. The @str pointer + * Returns the Name parsed or NULL. The @str pointer * is updated to the current location in the string. */ @@ -3504,6 +3609,13 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlCha while (xmlIsNameChar(ctxt, c)) { if (len + 10 > max) { xmlChar *tmp; + + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -3523,6 +3635,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlCha return(buffer); } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } *str = cur; return(xmlStrndup(buf, len)); } @@ -3552,16 +3669,25 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { #endif GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); while (xmlIsNameChar(ctxt, c)) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay @@ -3577,13 +3703,23 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } memcpy(buffer, buf, len); while (xmlIsNameChar(ctxt, c)) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buffer); + return(NULL); + } } if (len + 10 > max) { xmlChar *tmp; + if ((max > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -3604,6 +3740,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } if (len == 0) return(NULL); + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + return(NULL); + } return(xmlStrndup(buf, len)); } @@ -3650,6 +3791,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o ctxt->instate = XML_PARSER_ENTITY_VALUE; input = ctxt->input; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } NEXT; c = CUR_CHAR(l); /* @@ -3657,12 +3802,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o * When a parameter entity reference appears in a literal entity * value, ... a single or double quote character in the replacement * text is always treated as a normal data character and will not - * terminate the literal. + * terminate the literal. * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ - while ((IS_CHAR(c)) && ((c != stop) || /* checked */ - (ctxt->input != input))) { + while (((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { if (len + 5 >= size) { xmlChar *tmp; @@ -3691,6 +3836,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o } } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } /* * Raise problem w.r.t. '&' and '%' being used in non-entities @@ -3738,12 +3887,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o */ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 0, 0, 0); - if (orig != NULL) + if (orig != NULL) *orig = buf; else xmlFree(buf); } - + return(ret); } @@ -3764,8 +3913,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at xmlChar limit = 0; xmlChar *buf = NULL; xmlChar *rep = NULL; - int len = 0; - int buf_size = 0; + size_t len = 0; + size_t buf_size = 0; int c, l, in_space = 0; xmlChar *current = NULL; xmlEntityPtr ent; @@ -3787,15 +3936,26 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); + buf = (xmlChar *) xmlMallocAtomic(buf_size); if (buf == NULL) goto mem_error; /* * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); - while ((NXT(0) != limit) && /* checked */ - (IS_CHAR(c)) && (c != '<')) { + while (((NXT(0) != limit) && /* checked */ + (IS_CHAR(c)) && (c != '<')) && + (ctxt->instate != XML_PARSER_EOF)) { + /* + * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE + * special option is given + */ + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } if (c == 0) break; if (c == '&') { in_space = 0; @@ -3804,7 +3964,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at if (val == '&') { if (ctxt->replaceEntities) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3813,7 +3973,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c */ - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3823,7 +3983,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at buf[len++] = ';'; } } else if (val != 0) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } len += xmlCopyChar(0, &buf[len], val); @@ -3835,7 +3995,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if ((ctxt->replaceEntities == 0) && @@ -3848,7 +4008,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } else { buf[len++] = ent->content[0]; } - } else if ((ent != NULL) && + } else if ((ent != NULL) && (ctxt->replaceEntities != 0)) { if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { rep = xmlStringDecodeEntities(ctxt, ent->content, @@ -3863,7 +4023,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at current++; } else buf[len++] = *current++; - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3871,7 +4031,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at rep = NULL; } } else { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if (ent->content != NULL) @@ -3886,10 +4046,16 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * entities problems */ if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL)) { + (ent->content != NULL) && (ent->checked == 0)) { + unsigned long oldnbent = ctxt->nbentities; + rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, 0, 0, 0); + + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; if (rep != NULL) { + if (xmlStrchr(rep, '<')) + ent->checked |= 1; xmlFree(rep); rep = NULL; } @@ -3899,7 +4065,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * Just output the reference */ buf[len++] = '&'; - while (len > buf_size - i - 10) { + while (len + i + 10 > buf_size) { growBuffer(buf, i + 10); } for (;i > 0;i--) @@ -3912,7 +4078,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); - while (len > buf_size - 10) { + while (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3921,7 +4087,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } else { in_space = 0; COPY_BUF(l,buf,len,c); - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3930,8 +4096,11 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at GROW; c = CUR_CHAR(l); } + if (ctxt->instate == XML_PARSER_EOF) + goto error; + if ((in_space) && (normalize)) { - while (buf[len - 1] == 0x20) len--; + while ((len > 0) && (buf[len - 1] == 0x20)) len--; } buf[len] = 0; if (RAW == '<') { @@ -3946,11 +4115,23 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } } else NEXT; - if (attlen != NULL) *attlen = len; + + /* + * There we potentially risk an overflow, don't allow attribute value of + * length more than INT_MAX it is a very reasonnable assumption ! + */ + if (len >= INT_MAX) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } + + if (attlen != NULL) *attlen = (int) len; return(buf); mem_error: xmlErrMemory(ctxt, NULL); +error: if (buf != NULL) xmlFree(buf); if (rep != NULL) @@ -3971,20 +4152,20 @@ mem_error: * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -4001,7 +4182,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { /** * xmlParseSystemLiteral: * @ctxt: an XML parser context - * + * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") @@ -4030,7 +4211,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); return(NULL); } - + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { xmlErrMemory(ctxt, NULL); @@ -4042,6 +4223,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); + xmlFree(buf); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -4056,6 +4244,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -4119,6 +4311,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (len + 1 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); + xmlFree(buf); + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -4133,6 +4331,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } NEXT; cur = CUR; @@ -4203,7 +4405,7 @@ static const unsigned char test_char_data[256] = { * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that - * string is not marking the end of a CDATA section. + * string is not marking the end of a CDATA section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ @@ -4339,6 +4541,8 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; in = ctxt->input->cur; } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); nbchar = 0; @@ -4368,7 +4572,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cda GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ - (cur != '&') && + (cur != '&') && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { @@ -4407,6 +4611,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cda if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) + return; } NEXTL(l); cur = CUR_CHAR(l); @@ -4499,7 +4705,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **pu } } else { /* - * We handle [83] so we return immediately, if + * We handle [83] so we return immediately, if * "S SystemLiteral" is not detected. From a purely parsing * point of view that's a nice mess. */ @@ -4508,7 +4714,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **pu ptr = CUR_PTR; if (!IS_BLANK_CH(*ptr)) return(NULL); - + while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ if ((*ptr != '\'') && (*ptr != '"')) return(NULL); } @@ -4536,11 +4742,12 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **pu * [15] Comment ::= '' */ static void -xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, + size_t len, size_t size) { int q, ql; int r, rl; int cur, l; - int count = 0; + size_t count = 0; int inputid; inputid = ctxt->input->id; @@ -4586,16 +4793,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar if ((r == '-') && (q == '-')) { xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } if (len + 5 >= size) { xmlChar *new_buf; - size *= 2; - new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size; + + new_size = size * 2; + new_buf = (xmlChar *) xmlRealloc(buf, new_size); if (new_buf == NULL) { xmlFree (buf); xmlErrMemory(ctxt, NULL); return; } buf = new_buf; + size = new_size; } COPY_BUF(ql,buf,len,q); q = r; @@ -4607,6 +4824,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } } NEXTL(l); cur = CUR_CHAR(l); @@ -4656,11 +4877,12 @@ not_terminated: void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int size = XML_PARSER_BUFFER_SIZE; - int len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; xmlParserInputState state; const xmlChar *in; - int nbchar = 0, ccol; + size_t nbchar = 0; + int ccol; int inputid; /* @@ -4740,6 +4962,13 @@ get_more: buf[len] = 0; } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } ctxt->input->cur = in; if (*in == 0xA) { in++; @@ -4757,6 +4986,10 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } in = ctxt->input->cur; if (*in == '-') { if (in[1] == '-') { @@ -4775,7 +5008,8 @@ get_more: } if (buf != NULL) xmlFree(buf); - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; return; } if (buf != NULL) { @@ -4803,7 +5037,7 @@ get_more: /** * xmlParsePITarget: * @ctxt: an XML parser context - * + * * parse the name of a PI * * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) @@ -4840,7 +5074,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { NULL, NULL); } if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { - xmlNsErr(ctxt, XML_NS_ERR_COLON, + xmlNsErr(ctxt, XML_NS_ERR_COLON, "colon are forbidden from PI names '%s'\n", name, NULL, NULL); } return(name); @@ -4851,7 +5085,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { * xmlParseCatalogPI: * @ctxt: an XML parser context * @catalog: the PI value string - * + * * parse an XML Catalog Processing Instruction. * * @@ -4911,7 +5145,7 @@ error: /** * xmlParsePI: * @ctxt: an XML parser context - * + * * parse an XML Processing Instruction. * * [16] PI ::= '' Char*)))? '?>' @@ -4922,8 +5156,8 @@ error: void xmlParsePI(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int len = 0; - int size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; int cur, l; const xmlChar *target; xmlParserInputState state; @@ -4980,9 +5214,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { ((cur != '?') || (NXT(1) != '>'))) { if (len + 5 >= size) { xmlChar *tmp; - - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size = size * 2; + tmp = (xmlChar *) xmlRealloc(buf, new_size); if (tmp == NULL) { xmlErrMemory(ctxt, NULL); xmlFree(buf); @@ -4990,11 +5223,24 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { return; } buf = tmp; + size = new_size; } count++; if (count > 50) { GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } count = 0; + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, + "PI %s too big found", target); + xmlFree(buf); + ctxt->instate = state; + return; + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -5005,6 +5251,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, + "PI %s too big found", target); + xmlFree(buf); + ctxt->instate = state; + return; + } buf[len] = 0; if (cur != '?') { xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, @@ -5066,7 +5320,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { const xmlChar *name; xmlChar *Pubid; xmlChar *Systemid; - + if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { xmlParserInputPtr input = ctxt->input; SHRINK; @@ -5089,7 +5343,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { return; } if (xmlStrchr(name, ':') != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_COLON, + xmlNsErr(ctxt, XML_NS_ERR_COLON, "colon are forbidden from notation names '%s'\n", name, NULL, NULL); } @@ -5149,7 +5403,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { int isParameter = 0; xmlChar *orig = NULL; int skipped; - + /* GROW; done in the caller */ if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { xmlParserInputPtr input = ctxt->input; @@ -5178,7 +5432,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { return; } if (xmlStrchr(name, ':') != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_COLON, + xmlNsErr(ctxt, XML_NS_ERR_COLON, "colon are forbidden from entities names '%s'\n", name, NULL, NULL); } @@ -5343,6 +5597,8 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { } } } + if (ctxt->instate == XML_PARSER_EOF) + return; SKIP_BLANKS; if (RAW != '>') { xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, @@ -5406,13 +5662,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { * * [ VC: Fixed Attribute Default ] * if an attribute has a default value declared with the #FIXED - * keyword, instances of that attribute must match the default value. + * keyword, instances of that attribute must match the default value. * * [ WFC: No < in Attribute Values ] * handled in xmlParseAttValue() * * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED - * or XML_ATTRIBUTE_FIXED. + * or XML_ATTRIBUTE_FIXED. */ int @@ -5461,7 +5717,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **v * * [ VC: Notation Attributes ] * Values of this type must match one of the notation names included - * in the declaration; all notation names in the declaration must be declared. + * in the declaration; all notation names in the declaration must be declared. * * Returns: the notation attribute tree built while parsing */ @@ -5661,15 +5917,15 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnume * [ VC: Entity Name ] * Values of type ENTITY must match the Name production, values * of type ENTITIES must match Names; each Entity Name must match the - * name of an unparsed entity declared in the DTD. + * name of an unparsed entity declared in the DTD. * * [ VC: Name Token ] * Values of type NMTOKEN must match the Nmtoken production; values - * of type NMTOKENS must match Nmtokens. + * of type NMTOKENS must match Nmtokens. * * Returns the attribute type */ -int +int xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { SHRINK; if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { @@ -5734,7 +5990,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { } SKIP_BLANKS; GROW; - while (RAW != '>') { + while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; int type; int def; @@ -5812,7 +6068,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { xmlFreeEnumeration(tree); if ((ctxt->sax2) && (defaultValue != NULL) && - (def != XML_ATTRIBUTE_IMPLIED) && + (def != XML_ATTRIBUTE_IMPLIED) && (def != XML_ATTRIBUTE_REQUIRED)) { xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); } @@ -5841,7 +6097,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * * parse the declaration for a Mixed Element content * The leading '(' and spaces have been skipped in xmlParseElementContentDecl - * + * * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | * '(' S? '#PCDATA' S? ')' * @@ -5849,7 +6105,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * * [ VC: No Duplicate Types ] * The same name must not appear more than once in a single - * mixed-content declaration. + * mixed-content declaration. * * returns: the list of the xmlElementContentPtr describing the element choices */ @@ -5883,7 +6139,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); if (ret == NULL) return(NULL); } - while (RAW == '|') { + while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { NEXT; if (elem == NULL) { ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); @@ -5949,8 +6205,8 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, * * parse the declaration for a Mixed Element content * The leading '(' and spaces have been skipped in xmlParseElementContentDecl - * * + * * [47] children ::= (choice | seq) ('?' | '*' | '+')? * * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? @@ -5970,7 +6226,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, * be empty, and neither the first nor last non-blank character of * the replacement text should be a connector (| or ,). * - * Returns the tree of xmlElementContentPtr describing the element + * Returns the tree of xmlElementContentPtr describing the element * hierarchy. */ static xmlElementContentPtr @@ -6027,7 +6283,7 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPt } SKIP_BLANKS; SHRINK; - while (RAW != ')') { + while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { /* * Each loop we parse one separator and one element. */ @@ -6283,7 +6539,7 @@ xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ct * * parse the declaration for an Element content either Mixed or Children, * the cases EMPTY and ANY are handled directly in xmlParseElementDecl - * + * * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children * * returns: the type of element content XML_ELEMENT_TYPE_xxx @@ -6306,6 +6562,8 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, cons } NEXT; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(-1); SKIP_BLANKS; if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { tree = xmlParseElementMixedContentDecl(ctxt, inputid); @@ -6409,7 +6667,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element declaration doesn't start and stop in the same entity\n"); } - + NEXT; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && (ctxt->sax->elementDecl != NULL)) { @@ -6421,7 +6679,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { /* * this is a trick: if xmlAddElementDecl is called, * instead of copying the full tree it is plugged directly - * if called from the parser. Avoid duplicating the + * if called from the parser. Avoid duplicating the * interfaces or change the API/ABI */ xmlFreeDocElementContent(ctxt->myDoc, content); @@ -6438,8 +6696,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { * xmlParseConditionalSections * @ctxt: an XML parser context * - * [61] conditionalSect ::= includeSect | ignoreSect - * [62] includeSect ::= '' + * [61] conditionalSect ::= includeSect | ignoreSect + * [62] includeSect ::= '' * [63] ignoreSect ::= '' * [64] ignoreSectContents ::= Ignore ('' Ignore)* * [65] Ignore ::= Char* - (Char* ('') Char*) @@ -6473,8 +6731,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { "Entering INCLUDE Conditional Section\n"); } - while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || - (NXT(2) != '>'))) { + while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || + (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -6542,7 +6800,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (ctxt->recovery == 0) ctxt->disableSAX = 1; ctxt->instate = XML_PARSER_IGNORE; - while ((depth >= 0) && (RAW != 0)) { + while (((depth >= 0) && (RAW != 0)) && + (ctxt->instate != XML_PARSER_EOF)) { if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { depth++; SKIP(3); @@ -6590,7 +6849,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { /** * xmlParseMarkupDecl: * @ctxt: an XML parser context - * + * * parse Markup declarations * * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | @@ -6607,7 +6866,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { * In the internal DTD subset, parameter-entity references can occur * only where markup declarations can occur, not within markup declarations. * (This does not apply to references that occur in external parameter - * entities or to the external subset.) + * entities or to the external subset.) */ void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { @@ -6736,7 +6995,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { * @ctxt: an XML parser context * @ExternalID: the external identifier * @SystemID: the system identifier (or URL) - * + * * parse Markup declarations from an external subset * * [30] extSubset ::= textDecl? extSubsetDecl @@ -6813,7 +7072,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xm break; } } - + if (RAW != 0) { xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); } @@ -6915,8 +7174,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * The first reference to the entity trigger a parsing phase * where the ent->children is filled with the result from * the parsing. + * Note: external parsed entities will not be loaded, it is not + * required for a non-validating parser, unless the parsing option + * of validating, or substituting entities were given. Doing so is + * far more secure as the parser will only process data coming from + * the document entity by default. */ - if (ent->checked == 0) { + if ((ent->checked == 0) && + ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || + (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { unsigned long oldnbent = ctxt->nbentities; /* @@ -6958,13 +7224,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * Store the number of entities needing parsing for this entity * content and do checkings */ - ent->checked = ctxt->nbentities - oldnbent; + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; + if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) + ent->checked |= 1; if (ret == XML_ERR_ENTITY_LOOP) { xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); xmlFreeNodeList(list); return; } - if (xmlParserEntityCheck(ctxt, 0, ent)) { + if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { xmlFreeNodeList(list); return; } @@ -7023,9 +7291,9 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { list = NULL; } if (ent->checked == 0) - ent->checked = 1; + ent->checked = 2; } else if (ent->checked != 1) { - ctxt->nbentities += ent->checked; + ctxt->nbentities += ent->checked / 2; } /* @@ -7116,14 +7384,19 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * Seems we are generating the DOM content, do * a simple tree copy for all references except the first * In the first occurrence list contains the replacement. - * progressive == 2 means we are operating on the Reader - * and since nodes are discarded we must copy all the time. */ if (((list == NULL) && (ent->owner == 0)) || (ctxt->parseMode == XML_PARSE_READER)) { xmlNodePtr nw = NULL, cur, firstChild = NULL; /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + + /* * when operating on a reader, the entities definitions * are always owning the entities subtree. if (ctxt->parseMode == XML_PARSE_READER) @@ -7160,10 +7433,18 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) xmlAddEntityReference(ent, firstChild, nw); #endif /* LIBXML_LEGACY_ENABLED */ - } else if (list == NULL) { + } else if ((list == NULL) || (ctxt->inputNr > 0)) { xmlNodePtr nw = NULL, cur, next, last, firstChild = NULL; + /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + + /* * Copy the entity child list and make it the new * entity child list. The goal is to make sure any * ID or REF referenced will be the one from the @@ -7260,6 +7541,8 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { xmlEntityPtr ent = NULL; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (RAW != '&') return(NULL); @@ -7277,7 +7560,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { NEXT; /* - * Predefined entites override any extra definition + * Predefined entities override any extra definition */ if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { ent = xmlGetPredefinedEntity(name); @@ -7286,7 +7569,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { } /* - * Increate the number of entity references parsed + * Increase the number of entity references parsed */ ctxt->nbentities++; @@ -7297,7 +7580,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { if (ctxt->sax != NULL) { if (ctxt->sax->getEntity != NULL) ent = ctxt->sax->getEntity(ctxt->userData, name); - if ((ctxt->wellFormed == 1 ) && (ent == NULL) && + if ((ctxt->wellFormed == 1 ) && (ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) ent = xmlGetPredefinedEntity(name); if ((ctxt->wellFormed == 1 ) && (ent == NULL) && @@ -7305,6 +7588,8 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { ent = xmlSAX2GetEntity(ctxt, name); } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an @@ -7368,14 +7653,16 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { * [ WFC: No < in Attribute Values ] * The replacement text of any entity referred to directly or * indirectly in an attribute value (other than "<") must - * not contain a <. + * not contain a <. */ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent != NULL) && (ent->content != NULL) && - (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (xmlStrchr(ent->content, '<'))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, - "'<' in entity '%s' is not allowed in attributes values\n", name); + (ent != NULL) && + (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { + if ((ent->checked & 1) || ((ent->checked == 0) && + (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, + "'<' in entity '%s' is not allowed in attributes values\n", name); + } } /* @@ -7397,7 +7684,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { /* * [ WFC: No Recursion ] * A parsed entity must not contain a recursive reference - * to itself, either directly or indirectly. + * to itself, either directly or indirectly. * Done somewhere else */ return(ent); @@ -7495,6 +7782,10 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const x ent = xmlSAX2GetEntity(ctxt, name); } } + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(name); + return(NULL); + } /* * [ WFC: Entity Declared ] @@ -7515,7 +7806,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const x * is not obligated to read and process their declarations; * for such documents, the rule that an entity must be * declared is a well-formedness constraint only if - * standalone='yes'. + * standalone='yes'. */ if (ent == NULL) { if ((ctxt->standalone == 1) || @@ -7606,7 +7897,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const x * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -7656,8 +7947,9 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) */ if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); + entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) + return; if (entity == NULL) { /* * [ WFC: Entity Declared ] @@ -7787,12 +8079,25 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityP while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && (IS_CHAR(c))) { xmlBufferAdd(buf, ctxt->input->cur, l); - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); + } } NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); + } + c = CUR_CHAR(l); + } } if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { @@ -7881,8 +8186,11 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const */ if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); + entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(name); + return(NULL); + } if (entity == NULL) { /* * [ WFC: Entity Declared ] @@ -7932,12 +8240,12 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const * * parse a DOCTYPE declaration * - * [28] doctypedecl ::= '' * * [ VC: Root Element Type ] * The Name in the document type declaration must match the element - * type of the root element. + * type of the root element. */ void @@ -7984,6 +8292,8 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && (!ctxt->disableSAX)) ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); + if (ctxt->instate == XML_PARSER_EOF) + return; /* * Is there any internal subset declarations ? @@ -8019,11 +8329,11 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { ctxt->instate = XML_PARSER_DTD; NEXT; /* - * Parse the succession of Markup declarations and + * Parse the succession of Markup declarations and * PEReferences. * Subsequence (markupdecl | PEReference | S)* */ - while (RAW != ']') { + while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -8043,7 +8353,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { break; } } - if (RAW == ']') { + if (RAW == ']') { NEXT; SKIP_BLANKS; } @@ -8074,8 +8384,8 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { * * [ WFC: No < in Attribute Values ] * The replacement text of any entity referred to directly or indirectly in - * an attribute value (other than "<") must not contain a <. - * + * an attribute value (other than "<") must not contain a <. + * * [ VC: Attribute Value Type ] * The attribute must have been declared; the value must be of the type * declared for it. @@ -8156,7 +8466,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **val /** * xmlParseStartTag: * @ctxt: an XML parser context - * + * * parse a start of tag either for rule element or * EmptyElement. In both case we don't parse the tag closing chars. * @@ -8164,13 +8474,13 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **val * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * With namespace: * @@ -8209,9 +8519,9 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { SKIP_BLANKS; GROW; - while ((RAW != '>') && + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && - (IS_BYTE_CHAR(RAW))) { + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *q = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -8220,7 +8530,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same - * start-tag or empty-element tag. + * start-tag or empty-element tag. */ for (i = 0; i < nbatts;i += 2) { if (xmlStrEqual(atts[i], attname)) { @@ -8269,7 +8579,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { xmlFree(attvalue); } -failed: +failed: GROW if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) @@ -8351,7 +8661,7 @@ xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { /* * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ if (name != (xmlChar*)1) { @@ -8447,7 +8757,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **p if (CUR == ':') { l = xmlParseName(ctxt); if (l != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_QNAME, + xmlNsErr(ctxt, XML_NS_ERR_QNAME, "Failed to parse QName '%s'\n", l, NULL, NULL); *prefix = NULL; return(l); @@ -8530,7 +8840,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar cmp = prefix; while (*in != 0 && *in == *cmp) { - ++in; + ++in; ++cmp; } if ((*cmp == 0) && (*in == ':')) { @@ -8568,20 +8878,20 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -8627,7 +8937,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *l /* * Skip any leading spaces */ - while ((in < end) && (*in != limit) && + while ((in < end) && (*in != limit) && ((*in == 0x20) || (*in == 0x9) || (*in == 0xA) || (*in == 0xD))) { in++; @@ -8635,12 +8945,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *l if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } while ((in < end) && (*in != limit) && (*in >= 0x20) && @@ -8649,12 +8967,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *l if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } last = in; @@ -8662,13 +8988,15 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *l * skip the trailing blanks */ while ((last[-1] == 0x20) && (last > start)) last--; - while ((in < end) && (*in != limit) && + while ((in < end) && (*in != limit) && ((*in == 0x20) || (*in == 0x9) || (*in == 0xA) || (*in == 0xD))) { in++; if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; @@ -8676,8 +9004,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *l last = last + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } if (*in != limit) goto need_complex; } else { while ((in < end) && (*in != limit) && (*in >= 0x20) && @@ -8686,15 +9026,29 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *l if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } } } last = in; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + return(NULL); + } if (*in != limit) goto need_complex; } in++; @@ -8833,7 +9187,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, /** * xmlParseStartTag2: * @ctxt: an XML parser context - * + * * parse a start of tag either for rule element or * EmptyElement. In both case we don't parse the tag closing chars. * This routine is called when running SAX2 parsing @@ -8842,13 +9196,13 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * With namespace: * @@ -8917,9 +9271,9 @@ reparse: GROW; if (ctxt->input->base != base) goto base_changed; - while ((RAW != '>') && + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && - (IS_BYTE_CHAR(RAW))) { + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *q = CUR_PTR; unsigned int cons = ctxt->input->consumed; int len = -1, alloc = 0; @@ -9090,6 +9444,8 @@ skip_ns: failed: GROW + if (ctxt->instate == XML_PARSER_EOF) + break; if (ctxt->input->base != base) goto base_changed; if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) break; @@ -9181,7 +9537,7 @@ failed: atts[nbatts++] = defaults->values[5 * i + 3]; if ((ctxt->standalone == 1) && (defaults->values[5 * i + 4] != NULL)) { - xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, "standalone: attribute %s on %s defaulted from external subset\n", attname, localname); } @@ -9211,7 +9567,7 @@ failed: /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same - * start-tag or empty-element tag. + * start-tag or empty-element tag. * As extended by the Namespace in XML REC. */ for (j = 0; j < i;j += 5) { @@ -9327,6 +9683,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar * * We should definitely be at the ending "S? '>'" part */ GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; SKIP_BLANKS; if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); @@ -9336,7 +9694,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar * /* * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ if (name != (xmlChar*)1) { @@ -9365,7 +9723,7 @@ done: /** * xmlParseCDSect: * @ctxt: an XML parser context - * + * * Parse escaped pure raw content. * * [18] CDSect ::= CDStart CData CDEnd @@ -9418,14 +9776,21 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if ((size > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, + "CData section too big found", NULL); + xmlFree (buf); + return; + } + tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); if (tmp == NULL) { xmlFree(buf); xmlErrMemory(ctxt, NULL); return; } buf = tmp; + size *= 2; } COPY_BUF(rl,buf,len,r); r = s; @@ -9435,6 +9800,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { count++; if (count > 50) { GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } count = 0; } NEXTL(l); @@ -9514,7 +9883,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { /* * Fifth case : a reference. If if has not been resolved, - * parsing returns it's Name, create the node + * parsing returns it's Name, create the node */ else if (*cur == '&') { @@ -9555,7 +9924,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { * * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ @@ -9614,7 +9983,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { /* * [ VC: Root Element Type ] * The Name in the document type declaration must match the element - * type of the root element. + * type of the root element. */ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->node && (ctxt->node == ctxt->myDoc->children)) @@ -9683,6 +10052,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * Parse the content of the element: */ xmlParseContent(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + return; if (!IS_BYTE_CHAR(RAW)) { xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, "Premature end of data in tag %s line %d\n", @@ -9895,7 +10266,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { /** * xmlParseEncodingDecl: * @ctxt: an XML parser context - * + * * parse the XML encoding declaration * * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") @@ -9952,7 +10323,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { /* * If no encoding was passed to the parser, that we are - * using UTF-16 and no decoder is present i.e. the + * using UTF-16 and no decoder is present i.e. the * document is apparently UTF-8 compatible, then raise an * encoding mismatch fatal error */ @@ -10003,7 +10374,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { * parse the XML standalone declaration * * [32] SDDecl ::= S 'standalone' Eq - * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) + * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) * * [ VC: Standalone Document Declaration ] * TODO The standalone document declaration must have the value "no" @@ -10083,7 +10454,7 @@ xmlParseSDDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseXMLDecl: * @ctxt: an XML parser context - * + * * parse an XML declaration header * * [23] XMLDecl ::= '' @@ -10197,7 +10568,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseMisc: * @ctxt: an XML parser context - * + * * parse an XML Misc* optional field. * * [27] Misc ::= Comment | PI | S @@ -10205,9 +10576,10 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { void xmlParseMisc(xmlParserCtxtPtr ctxt) { - while (((RAW == '<') && (NXT(1) == '?')) || - (CMP4(CUR_PTR, '<', '!', '-', '-')) || - IS_BLANK_CH(CUR)) { + while ((ctxt->instate != XML_PARSER_EOF) && + (((RAW == '<') && (NXT(1) == '?')) || + (CMP4(CUR_PTR, '<', '!', '-', '-')) || + IS_BLANK_CH(CUR))) { if ((RAW == '<') && (NXT(1) == '?')) { xmlParsePI(ctxt); } else if (IS_BLANK_CH(CUR)) { @@ -10220,7 +10592,7 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) { /** * xmlParseDocument: * @ctxt: an XML parser context - * + * * parse an XML document (and build a tree if using the standard SAX * interface). * @@ -10254,10 +10626,12 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { */ if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); if ((ctxt->encoding == NULL) && ((ctxt->input->end - ctxt->input->cur) >= 4)) { - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -10305,6 +10679,8 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { } if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); /* * The Misc part of the Prolog @@ -10324,6 +10700,8 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { if (RAW == '[') { ctxt->instate = XML_PARSER_DTD; xmlParseInternalSubset(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); } /* @@ -10334,6 +10712,8 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { (!ctxt->disableSAX)) ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, ctxt->extSubSystem, ctxt->extSubURI); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); ctxt->inSubset = 0; xmlCleanSpecialAttr(ctxt); @@ -10400,7 +10780,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { /** * xmlParseExtParsedEnt: * @ctxt: an XML parser context - * + * * parse a general parsed entity * An external general parsed entity is well-formed if it matches the * production labeled extParsedEnt. @@ -10431,7 +10811,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -10474,6 +10854,8 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { } if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); /* * Doing validity checking on chunk doesn't make sense @@ -10484,7 +10866,9 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { ctxt->depth = 0; xmlParseContent(ctxt); - + if (ctxt->instate == XML_PARSER_EOF) + return(-1); + if ((RAW == '<') && (NXT(1) == '/')) { xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); } else if (RAW != 0) { @@ -10504,7 +10888,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { #ifdef LIBXML_PUSH_ENABLED /************************************************************************ * * - * Progressive parsing interfaces * + * Progressive parsing interfaces * * * ************************************************************************/ @@ -10541,8 +10925,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar buf = in->base; len = in->length; } else { - buf = in->buf->buffer->content; - len = in->buf->buffer->use; + buf = xmlBufContent(in->buf->buffer); + len = xmlBufUse(in->buf->buffer); } /* take into account the sequence length */ if (third) len -= 2; @@ -10565,7 +10949,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c' found at %d\n", first, next, base); - else + else xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c%c' found at %d\n", first, next, third, base); @@ -10581,7 +10965,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar else if (third == 0) xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c' failed\n", first, next); - else + else xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c%c' failed\n", first, next, third); #endif @@ -10663,7 +11047,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { if ((utf == NULL) || (len <= 0)) return(0); - + for (ix = 0; ix < len;) { /* string is 0-terminated */ c = utf[ix]; if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ @@ -10791,11 +11175,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina } xmlParseGetLasts(ctxt, &lastlt, &lastgt); - while (1) { + while (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(0); - + /* * Pop-up of finished entities. */ @@ -10810,22 +11194,22 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina /* * If we are operating on converted input, try to flush * remainng chars to avoid them stalling in the non-converted - * buffer. + * buffer. But do not do this in document start where + * encoding="..." may not have been read and we work on a + * guessed encoding. */ - if ((ctxt->input->buf->raw != NULL) && - (ctxt->input->buf->raw->use > 0)) { - int base = ctxt->input->base - - ctxt->input->buf->buffer->content; - int current = ctxt->input->cur - ctxt->input->base; + if ((ctxt->instate != XML_PARSER_START) && + (ctxt->input->buf->raw != NULL) && + (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, + ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, 0, ""); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + current; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ - ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, + base, current); } - avail = ctxt->input->buf->buffer->use - + avail = xmlBufUse(ctxt->input->buf->buffer) - (ctxt->input->cur - ctxt->input->base); } if (avail < 1) @@ -10847,7 +11231,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina if (avail < 4) goto done; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines, @@ -11002,7 +11386,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina /* * [ VC: Root Element Type ] * The Name in the document type declaration must match - * the element type of the root element. + * the element type of the root element. */ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->node && (ctxt->node == ctxt->myDoc->children)) @@ -11031,12 +11415,15 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina ctxt->sax->endElement(ctxt->userData, name); #endif /* LIBXML_SAX1_ENABLED */ } + if (ctxt->instate == XML_PARSER_EOF) + goto done; spacePop(ctxt); if (ctxt->nameNr == 0) { ctxt->instate = XML_PARSER_EPILOG; } else { ctxt->instate = XML_PARSER_CONTENT; } + ctxt->progressive = 1; break; } if (RAW == '>') { @@ -11056,6 +11443,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina #endif /* LIBXML_SAX1_ENABLED */ ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; break; } case XML_PARSER_CONTENT: { @@ -11073,9 +11461,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina break; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; } else if ((cur == '<') && (next != '!')) { ctxt->instate = XML_PARSER_START_TAG; break; @@ -11089,10 +11481,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina ctxt->input->cur += 4; term = xmlParseLookupSequence(ctxt, '-', '-', '>'); ctxt->input->cur -= 4; - if ((!terminate) && (term < 0)) + if ((!terminate) && (term < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } xmlParseComment(ctxt); ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && (ctxt->input->cur[2] == '[') && (ctxt->input->cur[3] == 'C') && @@ -11187,7 +11582,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina break; case XML_PARSER_CDATA_SECTION: { /* - * The Push mode need to have the SAX callback for + * The Push mode need to have the SAX callback for * cdataBlock merge back contiguous callbacks. */ int base; @@ -11197,7 +11592,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { int tmp; - tmp = xmlCheckCdataPush(ctxt->input->cur, + tmp = xmlCheckCdataPush(ctxt->input->cur, XML_PARSER_BIG_BUFFER_SIZE); if (tmp < 0) { tmp = -tmp; @@ -11212,6 +11607,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina ctxt->sax->characters(ctxt->userData, ctxt->input->cur, tmp); } + if (ctxt->instate == XML_PARSER_EOF) + goto done; SKIPL(tmp); ctxt->checkIndex = 0; } @@ -11247,6 +11644,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina ctxt->sax->characters(ctxt->userData, ctxt->input->cur, base); } + if (ctxt->instate == XML_PARSER_EOF) + goto done; SKIPL(base + 3); ctxt->checkIndex = 0; ctxt->instate = XML_PARSER_CONTENT; @@ -11263,7 +11662,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - + avail = xmlBufUse(ctxt->input->buf->buffer) - (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; @@ -11271,26 +11670,37 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; + ctxt->instate = XML_PARSER_MISC; + ctxt->progressive = 1; ctxt->checkIndex = 0; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_MISC; + ctxt->progressive = 1; ctxt->checkIndex = 0; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == 'D') && @@ -11301,14 +11711,20 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina (ctxt->input->cur[7] == 'P') && (ctxt->input->cur[8] == 'E')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { + ctxt->progressive = XML_PARSER_DTD; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing internal subset\n"); #endif ctxt->inSubset = 1; + ctxt->progressive = 0; + ctxt->checkIndex = 0; xmlParseDocTypeDecl(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; if (RAW == '[') { ctxt->instate = XML_PARSER_DTD; #ifdef DEBUG_PUSH @@ -11338,7 +11754,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina goto done; } else { ctxt->instate = XML_PARSER_START_TAG; - ctxt->progressive = 1; + ctxt->progressive = XML_PARSER_START_TAG; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -11351,38 +11767,50 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina if (ctxt->input->buf == NULL) avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); - if (avail < 2) + avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); + if (avail < 2) goto done; cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; + ctxt->instate = XML_PARSER_PROLOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_PROLOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; if (ctxt->progressive == 0) - ctxt->progressive = 1; + ctxt->progressive = XML_PARSER_START_TAG; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -11395,32 +11823,43 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina if (ctxt->input->buf == NULL) avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_EPILOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_EPILOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; @@ -11450,29 +11889,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina int base, i; xmlChar *buf; xmlChar quote = 0; + size_t use; base = ctxt->input->cur - ctxt->input->base; if (base < 0) return(0); if (ctxt->checkIndex > base) base = ctxt->checkIndex; - buf = ctxt->input->buf->buffer->content; - for (;(unsigned int) base < ctxt->input->buf->buffer->use; - base++) { + buf = xmlBufContent(ctxt->input->buf->buffer); + use = xmlBufUse(ctxt->input->buf->buffer); + for (;(unsigned int) base < use; base++) { if (quote != 0) { if (buf[base] == quote) quote = 0; - continue; + continue; } if ((quote == 0) && (buf[base] == '<')) { int found = 0; /* special handling of comments */ - if (((unsigned int) base + 4 < - ctxt->input->buf->buffer->use) && + if (((unsigned int) base + 4 < use) && (buf[base + 1] == '!') && (buf[base + 2] == '-') && (buf[base + 3] == '-')) { - for (;(unsigned int) base + 3 < - ctxt->input->buf->buffer->use; base++) { + for (;(unsigned int) base + 3 < use; base++) { if ((buf[base] == '-') && (buf[base + 1] == '-') && (buf[base + 2] == '>')) { @@ -11503,17 +11941,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina fprintf(stderr, "%c%c%c%c: ", buf[base], buf[base + 1], buf[base + 2], buf[base + 3]); #endif - if ((unsigned int) base +1 >= - ctxt->input->buf->buffer->use) + if ((unsigned int) base +1 >= use) break; if (buf[base + 1] == ']') { /* conditional crap, skip both ']' ! */ base++; continue; } - for (i = 1; - (unsigned int) base + i < ctxt->input->buf->buffer->use; - i++) { + for (i = 1; (unsigned int) base + i < use; i++) { if (buf[base + i] == '>') { #if 0 fprintf(stderr, "found\n"); @@ -11531,7 +11966,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina fprintf(stderr, "end of stream\n"); #endif break; - + } not_end_of_int_subset: continue; /* for */ @@ -11539,6 +11974,10 @@ not_end_of_int_subset: /* * We didn't found the end of the Internal subset */ + if (quote == 0) + ctxt->checkIndex = base; + else + ctxt->checkIndex = 0; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, @@ -11547,7 +11986,10 @@ not_end_of_int_subset: goto done; found_end_int_subset: + ctxt->checkIndex = 0; xmlParseInternalSubset(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->inSubset = 2; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && (ctxt->sax->externalSubset != NULL)) @@ -11555,6 +11997,8 @@ found_end_int_subset: ctxt->extSubSystem, ctxt->extSubURI); ctxt->inSubset = 0; xmlCleanSpecialAttr(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + goto done; ctxt->instate = XML_PARSER_PROLOG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH @@ -11637,7 +12081,7 @@ found_end_int_subset: break; } } -done: +done: #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); #endif @@ -11657,6 +12101,55 @@ encoding_error: } /** + * xmlParseCheckTransition: + * @ctxt: an XML parser context + * @chunk: a char array + * @size: the size in byte of the chunk + * + * Check depending on the current parser state if the chunk given must be + * processed immediately or one need more data to advance on parsing. + * + * Returns -1 in case of error, 0 if the push is not needed and 1 if needed + */ +static int +xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { + if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) + return(-1); + if (ctxt->instate == XML_PARSER_START_TAG) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->progressive == XML_PARSER_COMMENT) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->instate == XML_PARSER_CDATA_SECTION) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->progressive == XML_PARSER_PI) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->instate == XML_PARSER_END_TAG) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if ((ctxt->progressive == XML_PARSER_DTD) || + (ctxt->instate == XML_PARSER_DTD)) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + return(1); +} + +/** * xmlParseChunk: * @ctxt: an XML parser context * @chunk: an char array @@ -11672,11 +12165,15 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk int terminate) { int end_in_lf = 0; int remain = 0; + size_t old_avail = 0; + size_t avail = 0; if (ctxt == NULL) return(XML_ERR_INTERNAL_ERROR); if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(ctxt->errNo); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); if (ctxt->instate == XML_PARSER_START) xmlDetectSAX2(ctxt); if ((size > 0) && (chunk != NULL) && (!terminate) && @@ -11689,10 +12186,11 @@ xmldecl_done: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; int res; + old_avail = xmlBufUse(ctxt->input->buf->buffer); /* * Specific handling if we autodetected an encoding, we should not * push more than the first line ... which depend on the encoding @@ -11728,16 +12226,13 @@ xmldecl_done: remain = 0; } } - res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); if (res < 0) { ctxt->errNo = XML_PARSER_EOF; ctxt->disableSAX = 1; return (XML_PARSER_EOF); } - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -11748,21 +12243,48 @@ xmldecl_done: if ((in->encoder != NULL) && (in->buffer != NULL) && (in->raw != NULL)) { int nbchars; + size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; - nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + nbchars = xmlCharEncInput(in, terminate); if (nbchars < 0) { /* TODO 2.6.0 */ xmlGenericError(xmlGenericErrorContext, "xmlParseChunk: encoder error\n"); return(XML_ERR_INVALID_ENCODING); } + xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); } } } - if (remain != 0) + if (remain != 0) { xmlParseTryOrFinish(ctxt, 0); - else - xmlParseTryOrFinish(ctxt, terminate); + } else { + if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) + avail = xmlBufUse(ctxt->input->buf->buffer); + /* + * Depending on the current state it may not be such + * a good idea to try parsing if there is nothing in the chunk + * which would be worth doing a parser state transition and we + * need to wait for more data + */ + if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || + (old_avail == 0) || (avail == 0) || + (xmlParseCheckTransition(ctxt, + (const char *)&ctxt->input->base[old_avail], + avail - old_avail))) + xmlParseTryOrFinish(ctxt, terminate); + } + if (ctxt->instate == XML_PARSER_EOF) + return(ctxt->errNo); + + if ((ctxt->input != NULL) && + (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + ctxt->instate = XML_PARSER_EOF; + } if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(ctxt->errNo); @@ -11774,28 +12296,35 @@ xmldecl_done: } if ((end_in_lf == 1) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, + ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; + xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); + + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, + base, current); } if (terminate) { /* * Check for termination */ - int avail = 0; + int cur_avail = 0; if (ctxt->input != NULL) { if (ctxt->input->buf == NULL) - avail = ctxt->input->length - - (ctxt->input->cur - ctxt->input->base); + cur_avail = ctxt->input->length - + (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - - (ctxt->input->cur - ctxt->input->base); + cur_avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); } - + if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->instate != XML_PARSER_EPILOG)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); - } - if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { + } + if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); } if (ctxt->instate != XML_PARSER_EOF) { @@ -11804,12 +12333,15 @@ xmldecl_done: } ctxt->instate = XML_PARSER_EOF; } - return((xmlParserErrors) ctxt->errNo); + if (ctxt->wellFormed == 0) + return((xmlParserErrors) ctxt->errNo); + else + return(0); } /************************************************************************ * * - * I/O front end functions to the parser * + * I/O front end functions to the parser * * * ************************************************************************/ @@ -11833,7 +12365,7 @@ xmldecl_done: */ xmlParserCtxtPtr -xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, +xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename) { xmlParserCtxtPtr ctxt; xmlParserInputPtr inputStream; @@ -11882,7 +12414,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *us memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); if (user_data != NULL) ctxt->userData = user_data; - } + } if (filename == NULL) { ctxt->directory = NULL; } else { @@ -11908,11 +12440,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *us } } inputStream->buf = buf; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = - &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; - + xmlBufResetInput(inputStream->buf->buffer, inputStream); inputPush(ctxt, inputStream); /* @@ -11923,15 +12451,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *us if ((size == 0) || (chunk == NULL)) { ctxt->charset = XML_CHAR_ENCODING_NONE; } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; - xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -11951,11 +12476,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *us * * Blocks further parser processing */ -void +void xmlStopParser(xmlParserCtxtPtr ctxt) { if (ctxt == NULL) return; ctxt->instate = XML_PARSER_EOF; + ctxt->errNo = XML_ERR_USER_STOP; ctxt->disableSAX = 1; if (ctxt->input != NULL) { ctxt->input->cur = BAD_CAST""; @@ -12032,7 +12558,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user #ifdef LIBXML_VALID_ENABLED /************************************************************************ * * - * Front ends when parsing a DTD * + * Front ends when parsing a DTD * * * ************************************************************************/ @@ -12043,7 +12569,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user * @enc: the charset encoding if known * * Load and parse a DTD - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. * @input will be freed by the function in any case. */ @@ -12068,7 +12594,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBuff /* * Set-up the SAX context */ - if (sax != NULL) { + if (sax != NULL) { if (ctxt->sax != NULL) xmlFree(ctxt->sax); ctxt->sax = sax; @@ -12122,7 +12648,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBuff if ((enc == XML_CHAR_ENCODING_NONE) && ((ctxt->input->end - ctxt->input->cur) >= 4)) { - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -12161,7 +12687,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBuff } if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12172,7 +12698,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBuff * @SystemID: a NAME* containing the URL to the DTD * * Load and parse an external subset. - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. */ @@ -12195,13 +12721,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *Ex /* * Set-up the SAX context */ - if (sax != NULL) { + if (sax != NULL) { if (ctxt->sax != NULL) xmlFree(ctxt->sax); ctxt->sax = sax; ctxt->userData = ctxt; } - + /* * Canonicalise the system ID */ @@ -12312,7 +12838,7 @@ xmlParseDTD(const xmlChar *ExternalID, const xmlChar * /************************************************************************ * * - * Front ends when parsing an Entity * + * Front ends when parsing an Entity * * * ************************************************************************/ @@ -12428,7 +12954,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const */ if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { - xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, + xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, "Version mismatch between document and entity\n"); } } @@ -12711,7 +13237,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParser if (ctxt->lastError.code != XML_ERR_OK) xmlCopyError(&ctxt->lastError, &oldctxt->lastError); - if (sax != NULL) + if (sax != NULL) ctxt->sax = oldsax; oldctxt->node_seq.maximum = ctxt->node_seq.maximum; oldctxt->node_seq.length = ctxt->node_seq.length; @@ -13530,7 +14056,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xm * @filename: the filename or URL * @options: a combination of xmlParserOption * - * Create a parser context for a file or URL content. + * Create a parser context for a file or URL content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time and for file accesses * @@ -13572,7 +14098,7 @@ xmlCreateURLParserCtxt(const char *filename, int optio * xmlCreateFileParserCtxt: * @filename: the filename * - * Create a parser context for a file content. + * Create a parser context for a file content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. * @@ -13650,7 +14176,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const ch if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -13750,7 +14276,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const x xmlClearParserCtxt(ctxt); return; } - + xmlClearParserCtxt(ctxt); if (filename != NULL) input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); @@ -13768,7 +14294,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const x * * parse an XML file and call the given SAX handler routines. * Automatic support for ZLIB/Compress compressed document is provided - * + * * Returns 0 in case of success or a error number otherwise */ int @@ -13776,7 +14302,7 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_d const char *filename) { int ret = 0; xmlParserCtxtPtr ctxt; - + ctxt = xmlCreateFileParserCtxt(filename); if (ctxt == NULL) return -1; if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) @@ -13786,9 +14312,9 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_d if (user_data != NULL) ctxt->userData = user_data; - + xmlParseDocument(ctxt); - + if (ctxt->wellFormed) ret = 0; else { @@ -13804,14 +14330,14 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_d ctxt->myDoc = NULL; } xmlFreeParserCtxt(ctxt); - + return ret; } #endif /* LIBXML_SAX1_ENABLED */ /************************************************************************ * * - * Front ends when parsing from memory * + * Front ends when parsing from memory * * * ************************************************************************/ @@ -13855,9 +14381,7 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size input->filename = NULL; input->buf = buf; - input->base = input->buf->buffer->content; - input->cur = input->buf->buffer->content; - input->end = &input->buf->buffer->content[input->buf->buffer->use]; + xmlBufResetInput(input->buf->buffer, input); inputPush(ctxt, input); return(ctxt); @@ -13913,7 +14437,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const xmlFreeDoc(ctxt->myDoc); ctxt->myDoc = NULL; } - if (sax != NULL) + if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); @@ -13931,7 +14455,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const * parse an XML in-memory block and use the given SAX function block * to handle the parsing callback. If sax is NULL, fallback to the default * DOM tree building routines. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13946,7 +14470,7 @@ xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *bu * @size: the size of the array * * parse an XML in-memory block and build a tree. - * + * * Returns the resulting document tree */ @@ -14000,7 +14524,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void * ctxt->userData = user_data; xmlParseDocument(ctxt); - + if (ctxt->wellFormed) ret = 0; else { @@ -14016,7 +14540,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void * ctxt->myDoc = NULL; } xmlFreeParserCtxt(ctxt); - + return ret; } #endif /* LIBXML_SAX1_ENABLED */ @@ -14050,7 +14574,7 @@ xmlCreateDocParserCtxt(const xmlChar *cur) { * parse an XML in-memory document and build a tree. * It use the given SAX function block to handle the parsing callback. * If sax is NULL, fallback to the default DOM tree building routines. - * + * * Returns the resulting document tree */ @@ -14065,7 +14589,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cu ctxt = xmlCreateDocParserCtxt(cur); if (ctxt == NULL) return(NULL); - if (sax != NULL) { + if (sax != NULL) { oldsax = ctxt->sax; ctxt->sax = sax; ctxt->userData = NULL; @@ -14082,7 +14606,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cu if (sax != NULL) ctxt->sax = oldsax; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -14091,7 +14615,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cu * @cur: a pointer to an array of xmlChar * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ @@ -14104,8 +14628,8 @@ xmlParseDoc(const xmlChar *cur) { #ifdef LIBXML_LEGACY_ENABLED /************************************************************************ * * - * Specific function to keep track of entities references * - * and used by the XSLT debugger * + * Specific function to keep track of entities references * + * and used by the XSLT debugger * * * ************************************************************************/ @@ -14115,7 +14639,7 @@ static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; * xmlAddEntityReference: * @ent : A valid entity * @firstNode : A valid first node for children of entity - * @lastNode : A valid last node of children entity + * @lastNode : A valid last node of children entity * * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY */ @@ -14144,7 +14668,7 @@ xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) /************************************************************************ * * - * Miscellaneous * + * Miscellaneous * * * ************************************************************************/ @@ -14260,7 +14784,7 @@ xmlCleanupParser(void) { * current scope */ #define DICT_FREE(str) \ - if ((str) && ((!dict) || \ + if ((str) && ((!dict) || \ (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ xmlFree((char *)(str)); @@ -14275,7 +14799,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; xmlDictPtr dict; - + if (ctxt == NULL) return; @@ -14343,6 +14867,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->catalogs = NULL; ctxt->nbentities = 0; ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) { @@ -14427,25 +14952,18 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *ch inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) filename); inputStream->buf = buf; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = - &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; + xmlBufResetInput(buf->buffer, inputStream); inputPush(ctxt, inputStream); if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> - use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -14596,6 +15114,8 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int o if (options & XML_PARSE_HUGE) { ctxt->options |= XML_PARSE_HUGE; options -= XML_PARSE_HUGE; + if (ctxt->dict != NULL) + xmlDictSetLimit(ctxt->dict, 0); } if (options & XML_PARSE_OLDSAX) { ctxt->options |= XML_PARSE_OLDSAX; @@ -14605,6 +15125,10 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int o ctxt->options |= XML_PARSE_IGNORE_ENC; options -= XML_PARSE_IGNORE_ENC; } + if (options & XML_PARSE_BIG_LINES) { + ctxt->options |= XML_PARSE_BIG_LINES; + options -= XML_PARSE_BIG_LINES; + } ctxt->linenumbers = 1; return (options); } @@ -14679,7 +15203,7 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, cons * @options: a combination of xmlParserOption * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14703,7 +15227,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const * @options: a combination of xmlParserOption * * parse an XML file from the filesystem or the network. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14726,7 +15250,7 @@ xmlReadFile(const char *filename, const char *encoding * @options: a combination of xmlParserOption * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14750,7 +15274,7 @@ xmlReadMemory(const char *buffer, int size, const char * parse an XML from a file descriptor and build a tree. * NOTE that the file descriptor will not be closed when the * reader is closed or reset. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14871,7 +15395,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * * * parse an XML file from the filesystem or the network. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14906,7 +15430,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *fil * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14950,7 +15474,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *b * This reuses the existing @ctxt parser context * NOTE that the file descriptor will not be closed when the * reader is closed or reset. - * + * * Returns the resulting document tree */ xmlDocPtr