--- embedaddon/libxml2/parser.c 2012/02/21 23:37:57 1.1.1.1 +++ embedaddon/libxml2/parser.c 2013/07/22 01:28:50 1.1.1.2.2.1 @@ -40,6 +40,7 @@ #endif #include <stdlib.h> +#include <limits.h> #include <string.h> #include <stdarg.h> #include <libxml/xmlmemory.h> @@ -79,6 +80,9 @@ #ifdef HAVE_ZLIB_H #include <zlib.h> #endif +#ifdef HAVE_LZMA_H +#include <lzma.h> +#endif static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); @@ -114,17 +118,34 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, * parser option. */ static int -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, - xmlEntityPtr ent) +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, + xmlEntityPtr ent, size_t replacement) { - unsigned long consumed = 0; + size_t consumed = 0; if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) return (0); if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) return (1); - if (size != 0) { + if (replacement != 0) { + if (replacement < XML_MAX_TEXT_LENGTH) + return(0); + /* + * If the volume of entity copy reaches 10 times the + * amount of parsed data and over the large text threshold + * then that's very likely to be an abuse. + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if (replacement < XML_PARSER_NON_LINEAR * consumed) + return(0); + } else if (size != 0) { + /* * Do the check based on the replacement size of the entity */ if (size < XML_PARSER_BIG_ENTITY) @@ -169,7 +190,6 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned l */ return (0); } - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return (1); } @@ -197,6 +217,7 @@ unsigned int xmlParserMaxDepth = 256; static const char *xmlW3CPIs[] = { "xml-stylesheet", + "xml-model", NULL }; @@ -731,7 +752,7 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, * @info1: extra information string * @info2: extra information string * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints + * Handle a namespace warning error */ static void xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, @@ -954,6 +975,12 @@ xmlHasFeature(xmlFeature feature) #else return(0); #endif + case XML_WITH_LZMA: +#ifdef LIBXML_LZMA_ENABLED + return(1); +#else + return(0); +#endif case XML_WITH_ICU: #ifdef LIBXML_ICU_ENABLED return(1); @@ -1819,15 +1846,14 @@ namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) if (ctxt->nameNr >= ctxt->nameMax) { const xmlChar * *tmp; - ctxt->nameMax *= 2; tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, - ctxt->nameMax * + ctxt->nameMax * 2 * sizeof(ctxt->nameTab[0])); if (tmp == NULL) { - ctxt->nameMax /= 2; goto mem_error; } ctxt->nameTab = tmp; + ctxt->nameMax *= 2; } ctxt->nameTab[ctxt->nameNr] = value; ctxt->name = value; @@ -2580,15 +2606,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { /* * Macro used to grow the current buffer. + * buffer##_size is expected to be a size_t + * mem_error: is expected to handle memory allocation failures */ #define growBuffer(buffer, n) { \ xmlChar *tmp; \ - buffer##_size *= 2; \ - buffer##_size += n; \ - tmp = (xmlChar *) \ - xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + size_t new_size = buffer##_size * 2 + n; \ + if (new_size < buffer##_size) goto mem_error; \ + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ if (tmp == NULL) goto mem_error; \ buffer = tmp; \ + buffer##_size = new_size; \ } /** @@ -2614,14 +2642,14 @@ xmlChar * xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; - int buffer_size = 0; + size_t buffer_size = 0; + size_t nbchars = 0; xmlChar *current = NULL; xmlChar *rep = NULL; const xmlChar *last; xmlEntityPtr ent; int c,l; - int nbchars = 0; if ((ctxt == NULL) || (str == NULL) || (len < 0)) return(NULL); @@ -2638,7 +2666,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; - buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); + buffer = (xmlChar *) xmlMallocAtomic(buffer_size); if (buffer == NULL) goto mem_error; /* @@ -2658,7 +2686,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { @@ -2676,7 +2704,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else { @@ -2693,9 +2721,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2708,8 +2735,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; - if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); } for (;i > 0;i--) buffer[nbchars++] = *cur++; @@ -2736,9 +2763,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2750,8 +2776,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, cons } else { COPY_BUF(l,buffer,nbchars,c); str += l; - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } if (str < last) @@ -3755,8 +3781,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at xmlChar limit = 0; xmlChar *buf = NULL; xmlChar *rep = NULL; - int len = 0; - int buf_size = 0; + size_t len = 0; + size_t buf_size = 0; int c, l, in_space = 0; xmlChar *current = NULL; xmlEntityPtr ent; @@ -3778,7 +3804,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); + buf = (xmlChar *) xmlMallocAtomic(buf_size); if (buf == NULL) goto mem_error; /* @@ -3795,7 +3821,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at if (val == '&') { if (ctxt->replaceEntities) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3804,7 +3830,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c */ - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3814,7 +3840,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at buf[len++] = ';'; } } else if (val != 0) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } len += xmlCopyChar(0, &buf[len], val); @@ -3826,7 +3852,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if ((ctxt->replaceEntities == 0) && @@ -3854,7 +3880,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at current++; } else buf[len++] = *current++; - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3862,7 +3888,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at rep = NULL; } } else { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if (ent->content != NULL) @@ -3890,7 +3916,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at * Just output the reference */ buf[len++] = '&'; - while (len > buf_size - i - 10) { + while (len + i + 10 > buf_size) { growBuffer(buf, i + 10); } for (;i > 0;i--) @@ -3903,7 +3929,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); - while (len > buf_size - 10) { + while (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3912,7 +3938,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } else { in_space = 0; COPY_BUF(l,buf,len,c); - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3922,7 +3948,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at c = CUR_CHAR(l); } if ((in_space) && (normalize)) { - while (buf[len - 1] == 0x20) len--; + while ((len > 0) && (buf[len - 1] == 0x20)) len--; } buf[len] = 0; if (RAW == '<') { @@ -3937,7 +3963,18 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *at } } else NEXT; - if (attlen != NULL) *attlen = len; + + /* + * There we potentially risk an overflow, don't allow attribute value of + * lenght more than INT_MAX it is a very reasonnable assumption ! + */ + if (len >= INT_MAX) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + goto mem_error; + } + + if (attlen != NULL) *attlen = (int) len; return(buf); mem_error: @@ -4769,13 +4806,14 @@ get_more: ctxt->instate = state; return; } - if (buf != NULL) - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n<!--%.50s\n", + if (buf != NULL) { + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, + "Double hyphen within comment: " + "<!--%.50s\n", buf); - else - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n", NULL); + } else + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, + "Double hyphen within comment\n", NULL); in++; ctxt->input->col++; } @@ -4949,7 +4987,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { (ctxt->sax->processingInstruction != NULL)) ctxt->sax->processingInstruction(ctxt->userData, target, NULL); - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; return; } buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); @@ -5029,7 +5068,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { } else { xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); } - ctxt->instate = state; + if (ctxt->instate != XML_PARSER_EOF) + ctxt->instate = state; } } @@ -6952,7 +6992,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlFreeNodeList(list); return; } - if (xmlParserEntityCheck(ctxt, 0, ent)) { + if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { xmlFreeNodeList(list); return; } @@ -6992,6 +7032,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ent->owner = 1; while (list != NULL) { list->parent = (xmlNodePtr) ent; + xmlSetTreeDoc(list, ent->doc); if (list->next == NULL) ent->last = list; list = list->next; @@ -7111,6 +7152,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlNodePtr nw = NULL, cur, firstChild = NULL; /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + + /* * when operating on a reader, the entities definitions * are always owning the entities subtree. if (ctxt->parseMode == XML_PARSE_READER) @@ -7150,7 +7198,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { } else if (list == NULL) { xmlNodePtr nw = NULL, cur, next, last, firstChild = NULL; + /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + + /* * Copy the entity child list and make it the new * entity child list. The goal is to make sure any * ID or REF referenced will be the one from the @@ -9552,7 +9608,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { const xmlChar *prefix = NULL; const xmlChar *URI = NULL; xmlParserNodeInfo node_info; - int line, tlen; + int line, tlen = 0; xmlNodePtr ret; int nsNr = ctxt->nsNr; @@ -9588,6 +9644,8 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { else name = xmlParseStartTag(ctxt); #endif /* LIBXML_SAX1_ENABLED */ + if (ctxt->instate == XML_PARSER_EOF) + return; if (name == NULL) { spacePop(ctxt); return; @@ -9921,6 +9979,13 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { } else { xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); } + + /* + * Non standard parsing, allowing the user to ignore encoding + */ + if (ctxt->options & XML_PARSE_IGNORE_ENC) + return(encoding); + /* * UTF-16 encoding stwich has already taken place at this stage, * more over the little-endian/big-endian selection is already done @@ -10967,6 +11032,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina else name = xmlParseStartTag(ctxt); #endif /* LIBXML_SAX1_ENABLED */ + if (ctxt->instate == XML_PARSER_EOF) + goto done; if (name == NULL) { spacePop(ctxt); ctxt->instate = XML_PARSER_EOF; @@ -11153,7 +11220,9 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int termina else xmlParseEndTag1(ctxt, 0); #endif /* LIBXML_SAX1_ENABLED */ - if (ctxt->nameNr == 0) { + if (ctxt->instate == XML_PARSER_EOF) { + /* Nothing */ + } else if (ctxt->nameNr == 0) { ctxt->instate = XML_PARSER_EPILOG; } else { ctxt->instate = XML_PARSER_CONTENT; @@ -11958,11 +12027,15 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user xmlParserCtxtPtr ctxt; xmlParserInputPtr inputStream; xmlParserInputBufferPtr buf; - + if (ioread == NULL) return(NULL); buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); - if (buf == NULL) return(NULL); + if (buf == NULL) { + if (ioclose != NULL) + ioclose(ioctx); + return (NULL); + } ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { @@ -11987,7 +12060,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); if (user_data != NULL) ctxt->userData = user_data; - } + } inputStream = xmlNewIOInputStream(ctxt, buf, enc); if (inputStream == NULL) { @@ -12404,6 +12477,16 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const } /* + * If the user provided its own SAX callbacks then reuse the + * useData callback field, otherwise the expected setup in a + * DOM builder is to have userData == ctxt + */ + if (ctx->userData == ctx) + ctxt->userData = ctxt; + else + ctxt->userData = ctx->userData; + + /* * Doing validity checking on chunk doesn't make sense */ ctxt->instate = XML_PARSER_CONTENT; @@ -14138,6 +14221,7 @@ xmlInitParser(void) { (xmlGenericError == NULL)) initGenericErrorDefaultFunc(NULL); xmlInitMemory(); + xmlInitializeDict(); xmlInitCharEncodingHandlers(); xmlDefaultSAXHandlerInit(); xmlRegisterDefaultInputCallbacks(); @@ -14302,6 +14386,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->catalogs = NULL; ctxt->nbentities = 0; ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) { @@ -14560,6 +14645,10 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int o ctxt->options |= XML_PARSE_OLDSAX; options -= XML_PARSE_OLDSAX; } + if (options & XML_PARSE_IGNORE_ENC) { + ctxt->options |= XML_PARSE_IGNORE_ENC; + options -= XML_PARSE_IGNORE_ENC; + } ctxt->linenumbers = 1; return (options); } @@ -14747,7 +14836,7 @@ xmlReadFd(int fd, const char *URL, const char *encodin * @options: a combination of xmlParserOption * * parse an XML document from I/O functions and source and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14763,8 +14852,11 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCa input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, XML_CHAR_ENCODING_NONE); - if (input == NULL) + if (input == NULL) { + if (ioclose != NULL) + ioclose(ioctx); return (NULL); + } ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { xmlFreeParserInputBuffer(input); @@ -14790,7 +14882,7 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCa * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14945,7 +15037,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, * * parse an XML document from I/O functions and source and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14966,8 +15058,11 @@ xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallb input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, XML_CHAR_ENCODING_NONE); - if (input == NULL) + if (input == NULL) { + if (ioclose != NULL) + ioclose(ioctx); return (NULL); + } stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input);