--- embedaddon/libxml2/HTMLtree.c 2013/07/22 01:22:20 1.1.1.2 +++ embedaddon/libxml2/HTMLtree.c 2014/06/15 19:53:29 1.1.1.3 @@ -30,16 +30,18 @@ #include #include +#include "buf.h" + /************************************************************************ * * - * Getting/Setting encoding meta tags * + * Getting/Setting encoding meta tags * * * ************************************************************************/ /** * htmlGetMetaEncoding: * @doc: the document - * + * * Encoding definition lookup in the Meta tags * * Returns the current encoding as flagged in the HTML source @@ -126,17 +128,17 @@ found_meta: found_content: encoding = xmlStrstr(content, BAD_CAST"charset="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET="); if (encoding != NULL) { encoding += 8; } else { encoding = xmlStrstr(content, BAD_CAST"charset ="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset ="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); if (encoding != NULL) encoding += 9; @@ -314,7 +316,7 @@ static const char* htmlBooleanAttrs[] = { * @name: the name of the attribute to check * * Determine if a given attribute is a boolean attribute. - * + * * returns: false if the attribute is not boolean, true otherwise. */ int @@ -338,7 +340,7 @@ xmlOutputBufferPtr xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); /************************************************************************ * * - * Output error handlers * + * Output error handlers * * * ************************************************************************/ /** @@ -387,17 +389,13 @@ htmlSaveErr(int code, xmlNodePtr node, const char *ext /************************************************************************ * * - * Dumping HTML tree content to a simple buffer * + * Dumping HTML tree content to a simple buffer * * * ************************************************************************/ -static int -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, - int format); - /** - * htmlNodeDumpFormat: - * @buf: the HTML buffer output + * htmlBufNodeDumpFormat: + * @buf: the xmlBufPtr output * @doc: the document * @cur: the current node * @format: should formatting spaces been added @@ -406,10 +404,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xm * * Returns the number of byte written or -1 in case of error */ -static int -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, +static size_t +htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) { - unsigned int use; + size_t use; int ret; xmlOutputBufferPtr outbuf; @@ -432,10 +430,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xm outbuf->context = NULL; outbuf->written = 0; - use = buf->use; + use = xmlBufUse(buf); htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); xmlFree(outbuf); - ret = buf->use - use; + ret = xmlBufUse(buf) - use; return (ret); } @@ -452,9 +450,24 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xm */ int htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufPtr buffer; + size_t ret; + + if ((buf == NULL) || (cur == NULL)) + return(-1); + xmlInitParser(); + buffer = xmlBufFromBuffer(buf); + if (buffer == NULL) + return(-1); - return(htmlNodeDumpFormat(buf, doc, cur, 1)); + ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); + + xmlBufBackToBuffer(buffer); + + if (ret > INT_MAX) + return(-1); + return((int) ret); } /** @@ -499,7 +512,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFile(out, handler); @@ -595,11 +608,11 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, i xmlOutputBufferFlush(buf); if (buf->conv != NULL) { - *size = buf->conv->use; - *mem = xmlStrndup(buf->conv->content, *size); + *size = xmlBufUse(buf->conv); + *mem = xmlStrndup(xmlBufContent(buf->conv), *size); } else { - *size = buf->buffer->use; - *mem = xmlStrndup(buf->buffer->content, *size); + *size = xmlBufUse(buf->buffer); + *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); } (void)xmlOutputBufferClose(buf); } @@ -621,7 +634,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *si /************************************************************************ * * - * Dumping HTML tree content to an I/O output buffer * + * Dumping HTML tree content to an I/O output buffer * * * ************************************************************************/ @@ -632,7 +645,7 @@ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNs * @buf: the HTML buffer output * @doc: the document * @encoding: the encoding string - * + * * TODO: check whether encoding is needed * * Dump the HTML document DTD, if any. @@ -650,14 +663,14 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr do xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->ExternalID != NULL) { xmlOutputBufferWriteString(buf, " PUBLIC "); - xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); + xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " "); - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); - } + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); + } } else if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " SYSTEM "); - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); } xmlOutputBufferWriteString(buf, ">\n"); } @@ -677,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr d xmlChar *value; /* - * TODO: The html output method should not escape a & character - * occurring in an attribute value immediately followed by - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * The html output method should not escape a & character + * occurring in an attribute value immediately followed by + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * This is implemented in xmlEncodeEntitiesReentrant */ if (cur == NULL) { @@ -707,15 +721,19 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr d while (IS_BLANK_CH(*tmp)) tmp++; - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); + /* + * the < and > have already been escaped at the entity level + * And doing so here breaks server side includes + */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>"); if (escaped != NULL) { - xmlBufferWriteQuotedString(buf->buffer, escaped); + xmlBufWriteQuotedString(buf->buffer, escaped); xmlFree(escaped); } else { - xmlBufferWriteQuotedString(buf->buffer, value); + xmlBufWriteQuotedString(buf->buffer, value); } } else { - xmlBufferWriteQuotedString(buf->buffer, value); + xmlBufWriteQuotedString(buf->buffer, value); } xmlFree(value); } else { @@ -1105,7 +1123,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { if ((cur == NULL) || (filename == NULL)) return(-1); - + xmlInitParser(); encoding = (const char *) htmlGetMetaEncoding(cur); @@ -1136,7 +1154,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); @@ -1156,7 +1174,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { * @encoding: the document encoding * * Dump an HTML document to a file using a given encoding. - * + * * returns: the number of byte written or -1 in case of failure. */ int @@ -1200,7 +1218,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, 0); @@ -1220,7 +1238,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur * * Dump an HTML document to a file using a given encoding * and formatting returns/spaces are added. - * + * * returns: the number of byte written or -1 in case of failure. */ int