--- embedaddon/libxml2/parser.c 2012/02/21 23:37:57 1.1 +++ embedaddon/libxml2/parser.c 2014/06/15 20:01:53 1.1.1.3.2.1 @@ -17,7 +17,7 @@ * parserInternals.c to reduce this file size. * As much as possible the functions are associated with their relative * production in the XML specification. A few productions defining the - * different ranges of character are actually implanted either in + * different ranges of character are actually implanted either in * parserInternals.h or parserInternals.c * The DOM tree build is realized from the default SAX callbacks in * the module SAX.c. @@ -40,6 +40,7 @@ #endif #include +#include #include #include #include @@ -79,7 +80,13 @@ #ifdef HAVE_ZLIB_H #include #endif +#ifdef HAVE_LZMA_H +#include +#endif +#include "buf.h" +#include "enc.h" + static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); @@ -114,17 +121,34 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, * parser option. */ static int -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, - xmlEntityPtr ent) +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, + xmlEntityPtr ent, size_t replacement) { - unsigned long consumed = 0; + size_t consumed = 0; if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) return (0); if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) return (1); - if (size != 0) { + if (replacement != 0) { + if (replacement < XML_MAX_TEXT_LENGTH) + return(0); + /* + * If the volume of entity copy reaches 10 times the + * amount of parsed data and over the large text threshold + * then that's very likely to be an abuse. + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if (replacement < XML_PARSER_NON_LINEAR * consumed) + return(0); + } else if (size != 0) { + /* * Do the check based on the replacement size of the entity */ if (size < XML_PARSER_BIG_ENTITY) @@ -146,7 +170,7 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned l /* * use the number of parsed entities in the replacement */ - size = ent->checked; + size = ent->checked / 2; /* * The amount of data parsed counting entities size only once @@ -169,7 +193,6 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned l */ return (0); } - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return (1); } @@ -191,12 +214,24 @@ unsigned int xmlParserMaxDepth = 256; #define XML_PARSER_BUFFER_SIZE 100 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" +/** + * XML_PARSER_CHUNK_SIZE + * + * When calling GROW that's the minimal amount of data + * the parser expected to have received. It is not a hard + * limit but an optimization when reading strings like Names + * It is not strictly needed as long as inputs available characters + * are followed by 0, which should be provided by the I/O level + */ +#define XML_PARSER_CHUNK_SIZE 100 + /* * List of XML prefixed PI allowed by W3C specs */ static const char *xmlW3CPIs[] = { "xml-stylesheet", + "xml-model", NULL }; @@ -229,7 +264,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityP /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -281,193 +316,201 @@ static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) { const char *errmsg; + char errstr[129] = ""; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; switch (error) { case XML_ERR_INVALID_HEX_CHARREF: - errmsg = "CharRef: invalid hexadecimal value\n"; + errmsg = "CharRef: invalid hexadecimal value"; break; case XML_ERR_INVALID_DEC_CHARREF: - errmsg = "CharRef: invalid decimal value\n"; + errmsg = "CharRef: invalid decimal value"; break; case XML_ERR_INVALID_CHARREF: - errmsg = "CharRef: invalid value\n"; + errmsg = "CharRef: invalid value"; break; case XML_ERR_INTERNAL_ERROR: errmsg = "internal error"; break; case XML_ERR_PEREF_AT_EOF: - errmsg = "PEReference at end of document\n"; + errmsg = "PEReference at end of document"; break; case XML_ERR_PEREF_IN_PROLOG: - errmsg = "PEReference in prolog\n"; + errmsg = "PEReference in prolog"; break; case XML_ERR_PEREF_IN_EPILOG: - errmsg = "PEReference in epilog\n"; + errmsg = "PEReference in epilog"; break; case XML_ERR_PEREF_NO_NAME: - errmsg = "PEReference: no name\n"; + errmsg = "PEReference: no name"; break; case XML_ERR_PEREF_SEMICOL_MISSING: - errmsg = "PEReference: expecting ';'\n"; + errmsg = "PEReference: expecting ';'"; break; case XML_ERR_ENTITY_LOOP: - errmsg = "Detected an entity reference loop\n"; + errmsg = "Detected an entity reference loop"; break; case XML_ERR_ENTITY_NOT_STARTED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ENTITY_PE_INTERNAL: - errmsg = "PEReferences forbidden in internal subset\n"; + errmsg = "PEReferences forbidden in internal subset"; break; case XML_ERR_ENTITY_NOT_FINISHED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ATTRIBUTE_NOT_STARTED: - errmsg = "AttValue: \" or ' expected\n"; + errmsg = "AttValue: \" or ' expected"; break; case XML_ERR_LT_IN_ATTRIBUTE: - errmsg = "Unescaped '<' not allowed in attributes values\n"; + errmsg = "Unescaped '<' not allowed in attributes values"; break; case XML_ERR_LITERAL_NOT_STARTED: - errmsg = "SystemLiteral \" or ' expected\n"; + errmsg = "SystemLiteral \" or ' expected"; break; case XML_ERR_LITERAL_NOT_FINISHED: - errmsg = "Unfinished System or Public ID \" or ' expected\n"; + errmsg = "Unfinished System or Public ID \" or ' expected"; break; case XML_ERR_MISPLACED_CDATA_END: - errmsg = "Sequence ']]>' not allowed in content\n"; + errmsg = "Sequence ']]>' not allowed in content"; break; case XML_ERR_URI_REQUIRED: - errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; + errmsg = "SYSTEM or PUBLIC, the URI is missing"; break; case XML_ERR_PUBID_REQUIRED: - errmsg = "PUBLIC, the Public Identifier is missing\n"; + errmsg = "PUBLIC, the Public Identifier is missing"; break; case XML_ERR_HYPHEN_IN_COMMENT: - errmsg = "Comment must not contain '--' (double-hyphen)\n"; + errmsg = "Comment must not contain '--' (double-hyphen)"; break; case XML_ERR_PI_NOT_STARTED: - errmsg = "xmlParsePI : no target name\n"; + errmsg = "xmlParsePI : no target name"; break; case XML_ERR_RESERVED_XML_NAME: - errmsg = "Invalid PI name\n"; + errmsg = "Invalid PI name"; break; case XML_ERR_NOTATION_NOT_STARTED: - errmsg = "NOTATION: Name expected here\n"; + errmsg = "NOTATION: Name expected here"; break; case XML_ERR_NOTATION_NOT_FINISHED: - errmsg = "'>' required to close NOTATION declaration\n"; + errmsg = "'>' required to close NOTATION declaration"; break; case XML_ERR_VALUE_REQUIRED: - errmsg = "Entity value required\n"; + errmsg = "Entity value required"; break; case XML_ERR_URI_FRAGMENT: errmsg = "Fragment not allowed"; break; case XML_ERR_ATTLIST_NOT_STARTED: - errmsg = "'(' required to start ATTLIST enumeration\n"; + errmsg = "'(' required to start ATTLIST enumeration"; break; case XML_ERR_NMTOKEN_REQUIRED: - errmsg = "NmToken expected in ATTLIST enumeration\n"; + errmsg = "NmToken expected in ATTLIST enumeration"; break; case XML_ERR_ATTLIST_NOT_FINISHED: - errmsg = "')' required to finish ATTLIST enumeration\n"; + errmsg = "')' required to finish ATTLIST enumeration"; break; case XML_ERR_MIXED_NOT_STARTED: - errmsg = "MixedContentDecl : '|' or ')*' expected\n"; + errmsg = "MixedContentDecl : '|' or ')*' expected"; break; case XML_ERR_PCDATA_REQUIRED: - errmsg = "MixedContentDecl : '#PCDATA' expected\n"; + errmsg = "MixedContentDecl : '#PCDATA' expected"; break; case XML_ERR_ELEMCONTENT_NOT_STARTED: - errmsg = "ContentDecl : Name or '(' expected\n"; + errmsg = "ContentDecl : Name or '(' expected"; break; case XML_ERR_ELEMCONTENT_NOT_FINISHED: - errmsg = "ContentDecl : ',' '|' or ')' expected\n"; + errmsg = "ContentDecl : ',' '|' or ')' expected"; break; case XML_ERR_PEREF_IN_INT_SUBSET: errmsg = - "PEReference: forbidden within markup decl in internal subset\n"; + "PEReference: forbidden within markup decl in internal subset"; break; case XML_ERR_GT_REQUIRED: - errmsg = "expected '>'\n"; + errmsg = "expected '>'"; break; case XML_ERR_CONDSEC_INVALID: - errmsg = "XML conditional section '[' expected\n"; + errmsg = "XML conditional section '[' expected"; break; case XML_ERR_EXT_SUBSET_NOT_FINISHED: - errmsg = "Content error in the external subset\n"; + errmsg = "Content error in the external subset"; break; case XML_ERR_CONDSEC_INVALID_KEYWORD: errmsg = - "conditional section INCLUDE or IGNORE keyword expected\n"; + "conditional section INCLUDE or IGNORE keyword expected"; break; case XML_ERR_CONDSEC_NOT_FINISHED: - errmsg = "XML conditional section not closed\n"; + errmsg = "XML conditional section not closed"; break; case XML_ERR_XMLDECL_NOT_STARTED: - errmsg = "Text declaration '' expected\n"; + errmsg = "parsing XML declaration: '?>' expected"; break; case XML_ERR_EXT_ENTITY_STANDALONE: - errmsg = "external parsed entities cannot be standalone\n"; + errmsg = "external parsed entities cannot be standalone"; break; case XML_ERR_ENTITYREF_SEMICOL_MISSING: - errmsg = "EntityRef: expecting ';'\n"; + errmsg = "EntityRef: expecting ';'"; break; case XML_ERR_DOCTYPE_NOT_FINISHED: - errmsg = "DOCTYPE improperly terminated\n"; + errmsg = "DOCTYPE improperly terminated"; break; case XML_ERR_LTSLASH_REQUIRED: - errmsg = "EndTag: 'errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], info); if (ctxt != NULL) { ctxt->wellFormed = 0; @@ -622,7 +665,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErro */ static void xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, int val, + const char *msg, const xmlChar *str1, int val, const xmlChar *str2) { if ((ctxt != NULL) && (ctxt->disableSAX != 0) && @@ -731,7 +774,7 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, * @info1: extra information string * @info2: extra information string * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints + * Handle a namespace warning error */ static void xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, @@ -750,7 +793,7 @@ xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error /************************************************************************ * * - * Library wide options * + * Library wide options * * * ************************************************************************/ @@ -954,6 +997,12 @@ xmlHasFeature(xmlFeature feature) #else return(0); #endif + case XML_WITH_LZMA: +#ifdef LIBXML_LZMA_ENABLED + return(1); +#else + return(0); +#endif case XML_WITH_ICU: #ifdef LIBXML_ICU_ENABLED return(1); @@ -968,7 +1017,7 @@ xmlHasFeature(xmlFeature feature) /************************************************************************ * * - * SAX2 defaulted attributes handling * + * SAX2 defaulted attributes handling * * * ************************************************************************/ @@ -992,8 +1041,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) { ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); - if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || - (ctxt->str_xml_ns == NULL)) { + if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || + (ctxt->str_xml_ns == NULL)) { xmlErrMemory(ctxt, NULL); } } @@ -1507,7 +1556,7 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, c { if (ctxt->options & XML_PARSE_NSCLEAN) { int i; - for (i = 0;i < ctxt->nsNr;i += 2) { + for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { if (ctxt->nsTab[i] == prefix) { /* in scope */ if (ctxt->nsTab[i + 1] == URL) @@ -1819,15 +1868,14 @@ namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) if (ctxt->nameNr >= ctxt->nameMax) { const xmlChar * *tmp; - ctxt->nameMax *= 2; tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, - ctxt->nameMax * + ctxt->nameMax * 2 * sizeof(ctxt->nameTab[0])); if (tmp == NULL) { - ctxt->nameMax /= 2; goto mem_error; } ctxt->nameTab = tmp; + ctxt->nameMax *= 2; } ctxt->nameTab[ctxt->nameNr] = value; ctxt->name = value; @@ -1913,7 +1961,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings without newlines within the parser. - * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII + * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII * defined char within the parser. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * @@ -1962,10 +2010,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) { #define SKIPL(val) do { \ int skipl; \ for(skipl=0; skiplinput->cur) == '\n') { \ + if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ - } else ctxt->input->col++; \ - ctxt->nbChars++; \ + } else ctxt->input->col++; \ + ctxt->nbChars++; \ ctxt->input->cur++; \ } \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ @@ -1991,6 +2039,13 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) { xmlGROW (ctxt); static void xmlGROW (xmlParserCtxtPtr ctxt) { + if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + ctxt->instate = XML_PARSER_EOF; + } xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) @@ -2135,6 +2190,8 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); } ret = inputPush(ctxt, input); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); GROW; return(ret); } @@ -2150,7 +2207,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error */ @@ -2171,8 +2228,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 16 + (CUR - '0'); else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) val = val * 16 + (CUR - 'a') + 10; @@ -2202,8 +2261,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 10 + (CUR - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -2229,7 +2290,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -2254,7 +2315,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error, str will be * updated to the current value of the index @@ -2273,7 +2334,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml ptr += 3; cur = *ptr; while (cur != ';') { /* Non input consuming loop */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) val = val * 16 + (cur - 'a') + 10; @@ -2296,7 +2357,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml ptr += 2; cur = *ptr; while (cur != ';') { /* Non input consuming loops */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -2320,7 +2381,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -2342,9 +2403,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xml * * Returns the new input stream or NULL */ - + static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} - + static xmlParserInputPtr xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; @@ -2367,7 +2428,7 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, if (buffer == NULL) { xmlErrMemory(ctxt, NULL); xmlFree(input); - return(NULL); + return(NULL); } buffer [0] = ' '; buffer [1] = '%'; @@ -2386,12 +2447,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, /** * xmlParserHandlePEReference: * @ctxt: the parser context - * + * * [69] PEReference ::= '%' Name ';' * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -2409,9 +2470,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, * NOTE: misleading but this is handled. * * A PEReference may have been detected in the current input stream - * the handling is done accordingly to + * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc - * i.e. + * i.e. * - Included in literal in entity values * - Included as Parameter Entity reference within DTDs */ @@ -2488,8 +2549,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (ctxt->instate == XML_PARSER_EOF) + return; if (entity == NULL) { - + /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an @@ -2515,7 +2578,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); - } else + } else xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); @@ -2532,6 +2595,20 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlCharEncoding enc; /* + * Note: external parsed entities will not be loaded, it is + * not required for a non-validating parser, unless the + * option of validating, or substituting entities were + * given. Doing so is far more secure as the parser will + * only process data coming from the document entity by + * default. + */ + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + ((ctxt->options & XML_PARSE_NOENT) == 0) && + ((ctxt->options & XML_PARSE_DTDVALID) == 0) && + (ctxt->validate == 0)) + return; + + /* * handle the extra spaces added before and after * c.f. http://www.w3.org/TR/REC-xml#as-PE * this is done independently. @@ -2540,7 +2617,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if (xmlPushInput(ctxt, input) < 0) return; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -2550,6 +2627,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * the amount of data in the buffer. */ GROW + if (ctxt->instate == XML_PARSER_EOF) + return; if ((ctxt->input->end - ctxt->input->cur)>=4) { start[0] = RAW; start[1] = NXT(1); @@ -2580,15 +2659,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { /* * Macro used to grow the current buffer. + * buffer##_size is expected to be a size_t + * mem_error: is expected to handle memory allocation failures */ #define growBuffer(buffer, n) { \ xmlChar *tmp; \ - buffer##_size *= 2; \ - buffer##_size += n; \ - tmp = (xmlChar *) \ - xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + size_t new_size = buffer##_size * 2 + n; \ + if (new_size < buffer##_size) goto mem_error; \ + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ if (tmp == NULL) goto mem_error; \ buffer = tmp; \ + buffer##_size = new_size; \ } /** @@ -2600,7 +2681,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -2614,14 +2695,14 @@ xmlChar * xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; - int buffer_size = 0; + size_t buffer_size = 0; + size_t nbchars = 0; xmlChar *current = NULL; xmlChar *rep = NULL; const xmlChar *last; xmlEntityPtr ent; int c,l; - int nbchars = 0; if ((ctxt == NULL) || (str == NULL) || (len < 0)) return(NULL); @@ -2638,7 +2719,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; - buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); + buffer = (xmlChar *) xmlMallocAtomic(buffer_size); if (buffer == NULL) goto mem_error; /* @@ -2658,7 +2739,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { @@ -2671,12 +2752,12 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) goto int_error; if (ent != NULL) - ctxt->nbentities += ent->checked; + ctxt->nbentities += ent->checked / 2; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else { @@ -2693,9 +2774,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2708,8 +2788,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; - if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); } for (;i > 0;i--) buffer[nbchars++] = *cur++; @@ -2723,7 +2803,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) goto int_error; if (ent != NULL) - ctxt->nbentities += ent->checked; + ctxt->nbentities += ent->checked / 2; if (ent != NULL) { if (ent->content == NULL) { xmlLoadEntityContent(ctxt, ent); @@ -2736,9 +2816,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2750,8 +2829,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons } else { COPY_BUF(l,buffer,nbchars,c); str += l; - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } if (str < last) @@ -2780,7 +2859,7 @@ int_error: * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -3143,7 +3222,7 @@ xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { } else { if ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || - (c == '_') || (c == ':') || + (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) return(1); @@ -3168,6 +3247,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { * Handler for more complex cases */ GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); if ((ctxt->options & XML_PARSE_OLD10) == 0) { /* @@ -3216,9 +3297,11 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { ((c >= 0xFDF0) && (c <= 0xFFFD)) || ((c >= 0x10000) && (c <= 0xEFFFF)) )) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); @@ -3237,18 +3320,32 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || - (c == '_') || (c == ':') || + (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); @@ -3298,6 +3395,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -3333,21 +3435,40 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ (xmlIsNameChar(ctxt, c) && (c != ':'))) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); } /** * xmlParseNCName: * @ctxt: an XML parser context - * @len: lenght of the string parsed + * @len: length of the string parsed * * parse an XML name. * @@ -3385,6 +3506,11 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -3416,6 +3542,8 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const xmlChar *ret; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { @@ -3451,7 +3579,7 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar * * [6] Names ::= Name (#x20 Name)* * - * Returns the Name parsed or NULL. The @str pointer + * Returns the Name parsed or NULL. The @str pointer * is updated to the current location in the string. */ @@ -3495,6 +3623,13 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlCha while (xmlIsNameChar(ctxt, c)) { if (len + 10 > max) { xmlChar *tmp; + + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -3514,6 +3649,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlCha return(buffer); } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } *str = cur; return(xmlStrndup(buf, len)); } @@ -3543,16 +3683,25 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { #endif GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); while (xmlIsNameChar(ctxt, c)) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay @@ -3568,13 +3717,23 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } memcpy(buffer, buf, len); while (xmlIsNameChar(ctxt, c)) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buffer); + return(NULL); + } } if (len + 10 > max) { xmlChar *tmp; + if ((max > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -3595,6 +3754,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } if (len == 0) return(NULL); + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + return(NULL); + } return(xmlStrndup(buf, len)); } @@ -3641,6 +3805,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o ctxt->instate = XML_PARSER_ENTITY_VALUE; input = ctxt->input; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } NEXT; c = CUR_CHAR(l); /* @@ -3648,12 +3816,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o * When a parameter entity reference appears in a literal entity * value, ... a single or double quote character in the replacement * text is always treated as a normal data character and will not - * terminate the literal. + * terminate the literal. * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ - while ((IS_CHAR(c)) && ((c != stop) || /* checked */ - (ctxt->input != input))) { + while (((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { if (len + 5 >= size) { xmlChar *tmp; @@ -3682,6 +3850,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o } } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } /* * Raise problem w.r.t. '&' and '%' being used in non-entities @@ -3729,12 +3901,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **o */ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 0, 0, 0); - if (orig != NULL) + if (orig != NULL) *orig = buf; else xmlFree(buf); } - + return(ret); } @@ -3755,8 +3927,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at xmlChar limit = 0; xmlChar *buf = NULL; xmlChar *rep = NULL; - int len = 0; - int buf_size = 0; + size_t len = 0; + size_t buf_size = 0; int c, l, in_space = 0; xmlChar *current = NULL; xmlEntityPtr ent; @@ -3778,15 +3950,26 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); + buf = (xmlChar *) xmlMallocAtomic(buf_size); if (buf == NULL) goto mem_error; /* * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); - while ((NXT(0) != limit) && /* checked */ - (IS_CHAR(c)) && (c != '<')) { + while (((NXT(0) != limit) && /* checked */ + (IS_CHAR(c)) && (c != '<')) && + (ctxt->instate != XML_PARSER_EOF)) { + /* + * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE + * special option is given + */ + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } if (c == 0) break; if (c == '&') { in_space = 0; @@ -3795,7 +3978,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at if (val == '&') { if (ctxt->replaceEntities) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3804,7 +3987,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c */ - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3814,7 +3997,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at buf[len++] = ';'; } } else if (val != 0) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } len += xmlCopyChar(0, &buf[len], val); @@ -3826,7 +4009,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if ((ctxt->replaceEntities == 0) && @@ -3839,7 +4022,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } else { buf[len++] = ent->content[0]; } - } else if ((ent != NULL) && + } else if ((ent != NULL) && (ctxt->replaceEntities != 0)) { if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { rep = xmlStringDecodeEntities(ctxt, ent->content, @@ -3854,7 +4037,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at current++; } else buf[len++] = *current++; - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3862,7 +4045,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at rep = NULL; } } else { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if (ent->content != NULL) @@ -3877,10 +4060,16 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * entities problems */ if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL)) { + (ent->content != NULL) && (ent->checked == 0)) { + unsigned long oldnbent = ctxt->nbentities; + rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, 0, 0, 0); + + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; if (rep != NULL) { + if (xmlStrchr(rep, '<')) + ent->checked |= 1; xmlFree(rep); rep = NULL; } @@ -3890,7 +4079,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * Just output the reference */ buf[len++] = '&'; - while (len > buf_size - i - 10) { + while (len + i + 10 > buf_size) { growBuffer(buf, i + 10); } for (;i > 0;i--) @@ -3903,7 +4092,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); - while (len > buf_size - 10) { + while (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3912,7 +4101,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } else { in_space = 0; COPY_BUF(l,buf,len,c); - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3921,8 +4110,11 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at GROW; c = CUR_CHAR(l); } + if (ctxt->instate == XML_PARSER_EOF) + goto error; + if ((in_space) && (normalize)) { - while (buf[len - 1] == 0x20) len--; + while ((len > 0) && (buf[len - 1] == 0x20)) len--; } buf[len] = 0; if (RAW == '<') { @@ -3937,11 +4129,23 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } } else NEXT; - if (attlen != NULL) *attlen = len; + + /* + * There we potentially risk an overflow, don't allow attribute value of + * length more than INT_MAX it is a very reasonnable assumption ! + */ + if (len >= INT_MAX) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue length too long\n"); + goto mem_error; + } + + if (attlen != NULL) *attlen = (int) len; return(buf); mem_error: xmlErrMemory(ctxt, NULL); +error: if (buf != NULL) xmlFree(buf); if (rep != NULL) @@ -3962,20 +4166,20 @@ mem_error: * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -3992,7 +4196,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { /** * xmlParseSystemLiteral: * @ctxt: an XML parser context - * + * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") @@ -4021,7 +4225,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); return(NULL); } - + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { xmlErrMemory(ctxt, NULL); @@ -4033,6 +4237,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); + xmlFree(buf); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -4047,6 +4258,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -4110,6 +4325,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (len + 1 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); + xmlFree(buf); + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -4124,6 +4345,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } NEXT; cur = CUR; @@ -4194,7 +4419,7 @@ static const unsigned char test_char_data[256] = { * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that - * string is not marking the end of a CDATA section. + * string is not marking the end of a CDATA section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ @@ -4330,6 +4555,8 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; in = ctxt->input->cur; } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); nbchar = 0; @@ -4359,7 +4586,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cda GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ - (cur != '&') && + (cur != '&') && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { @@ -4398,6 +4625,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cda if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) + return; } NEXTL(l); cur = CUR_CHAR(l); @@ -4490,7 +4719,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **pu } } else { /* - * We handle [83] so we return immediately, if + * We handle [83] so we return immediately, if * "S SystemLiteral" is not detected. From a purely parsing * point of view that's a nice mess. */ @@ -4499,7 +4728,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **pu ptr = CUR_PTR; if (!IS_BLANK_CH(*ptr)) return(NULL); - + while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ if ((*ptr != '\'') && (*ptr != '"')) return(NULL); } @@ -4527,11 +4756,12 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **pu * [15] Comment ::= '' */ static void -xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, + size_t len, size_t size) { int q, ql; int r, rl; int cur, l; - int count = 0; + size_t count = 0; int inputid; inputid = ctxt->input->id; @@ -4577,16 +4807,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar if ((r == '-') && (q == '-')) { xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } if (len + 5 >= size) { xmlChar *new_buf; - size *= 2; - new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size; + + new_size = size * 2; + new_buf = (xmlChar *) xmlRealloc(buf, new_size); if (new_buf == NULL) { xmlFree (buf); xmlErrMemory(ctxt, NULL); return; } buf = new_buf; + size = new_size; } COPY_BUF(ql,buf,len,q); q = r; @@ -4598,6 +4838,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } } NEXTL(l); cur = CUR_CHAR(l); @@ -4647,11 +4891,12 @@ not_terminated: void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int size = XML_PARSER_BUFFER_SIZE; - int len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; xmlParserInputState state; const xmlChar *in; - int nbchar = 0, ccol; + size_t nbchar = 0; + int ccol; int inputid; /* @@ -4731,6 +4976,13 @@ get_more: buf[len] = 0; } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } ctxt->input->cur = in; if (*in == 0xA) { in++; @@ -4748,6 +5000,10 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } in = ctxt->input->cur; if (*in == '-') { if (in[1] == '-') { @@ -4766,16 +5022,18 @@ get_more: } if (buf != NULL) xmlFree(buf); - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; return; } - if (buf != NULL) - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n