|
version 1.1, 2012/02/21 23:37:58
|
version 1.1.1.3, 2014/06/15 19:53:28
|
|
Line 44
|
Line 44
|
| #include <libxml/globals.h> |
#include <libxml/globals.h> |
| #include <libxml/uri.h> |
#include <libxml/uri.h> |
| |
|
| |
#include "buf.h" |
| |
#include "enc.h" |
| |
|
| #define HTML_MAX_NAMELEN 1000 |
#define HTML_MAX_NAMELEN 1000 |
| #define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
#define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
| #define HTML_PARSER_BUFFER_SIZE 100 |
#define HTML_PARSER_BUFFER_SIZE 100 |
|
Line 727 static const char* const map_contents[] = { BLOCK, "ar
|
Line 730 static const char* const map_contents[] = { BLOCK, "ar
|
| static const char* const name_attr[] = { "name", NULL } ; |
static const char* const name_attr[] = { "name", NULL } ; |
| static const char* const action_attr[] = { "action", NULL } ; |
static const char* const action_attr[] = { "action", NULL } ; |
| static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
| static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ; | static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ; |
| static const char* const content_attr[] = { "content", NULL } ; |
static const char* const content_attr[] = { "content", NULL } ; |
| static const char* const type_attr[] = { "type", NULL } ; |
static const char* const type_attr[] = { "type", NULL } ; |
| static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
|
Line 1080 static const char * const htmlStartClose[] = {
|
Line 1083 static const char * const htmlStartClose[] = {
|
| "menu", "p", "head", "ul", NULL, |
"menu", "p", "head", "ul", NULL, |
| "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
| "div", "p", "head", NULL, |
"div", "p", "head", NULL, |
| "noscript", "p", "head", NULL, | "noscript", "p", NULL, |
| "center", "font", "b", "i", "p", "head", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
| "a", "a", NULL, | "a", "a", "head", NULL, |
| "caption", "p", NULL, |
"caption", "p", NULL, |
| "colgroup", "caption", "colgroup", "col", "p", NULL, |
"colgroup", "caption", "colgroup", "col", "p", NULL, |
| "col", "caption", "col", "p", NULL, |
"col", "caption", "col", "p", NULL, |
|
Line 1100 static const char * const htmlStartClose[] = {
|
Line 1103 static const char * const htmlStartClose[] = {
|
| "option", "option", NULL, |
"option", "option", NULL, |
| "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
| "pre", "listing", "xmp", "a", NULL, |
"pre", "listing", "xmp", "a", NULL, |
| |
/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ |
| |
"tt", "head", NULL, |
| |
"i", "head", NULL, |
| |
"b", "head", NULL, |
| |
"u", "head", NULL, |
| |
"s", "head", NULL, |
| |
"strike", "head", NULL, |
| |
"big", "head", NULL, |
| |
"small", "head", NULL, |
| |
|
| |
"em", "head", NULL, |
| |
"strong", "head", NULL, |
| |
"dfn", "head", NULL, |
| |
"code", "head", NULL, |
| |
"samp", "head", NULL, |
| |
"kbd", "head", NULL, |
| |
"var", "head", NULL, |
| |
"cite", "head", NULL, |
| |
"abbr", "head", NULL, |
| |
"acronym", "head", NULL, |
| |
|
| |
/* "a" */ |
| |
"img", "head", NULL, |
| |
/* "applet" */ |
| |
/* "embed" */ |
| |
/* "object" */ |
| |
"font", "head", NULL, |
| |
/* "basefont" */ |
| |
"br", "head", NULL, |
| |
/* "script" */ |
| |
"map", "head", NULL, |
| |
"q", "head", NULL, |
| |
"sub", "head", NULL, |
| |
"sup", "head", NULL, |
| |
"span", "head", NULL, |
| |
"bdo", "head", NULL, |
| |
"iframe", "head", NULL, |
| NULL |
NULL |
| }; |
}; |
| |
|
|
Line 2941 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
Line 2981 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
| */ |
*/ |
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| if (areBlanks(ctxt, buf, nbchar)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
| if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
| ctxt->sax->ignorableWhitespace(ctxt->userData, | if (ctxt->sax->characters != NULL) |
| buf, nbchar); | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| | } else { |
| | if (ctxt->sax->ignorableWhitespace != NULL) |
| | ctxt->sax->ignorableWhitespace(ctxt->userData, |
| | buf, nbchar); |
| | } |
| } else { |
} else { |
| htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
| if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
|
Line 2974 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
Line 3019 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
| */ |
*/ |
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| if (areBlanks(ctxt, buf, nbchar)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
| if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
| ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); | if (ctxt->sax->characters != NULL) |
| | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| | } else { |
| | if (ctxt->sax->ignorableWhitespace != NULL) |
| | ctxt->sax->ignorableWhitespace(ctxt->userData, |
| | buf, nbchar); |
| | } |
| } else { |
} else { |
| htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
| if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
|
Line 3435 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **v
|
Line 3486 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **v
|
| } |
} |
| |
|
| /** |
/** |
| * htmlCheckEncoding: | * htmlCheckEncodingDirect: |
| * @ctxt: an HTML parser context |
* @ctxt: an HTML parser context |
| * @attvalue: the attribute value |
* @attvalue: the attribute value |
| * |
* |
| * Checks an http-equiv attribute from a Meta tag to detect | * Checks an attribute value to detect |
| * the encoding |
* the encoding |
| * If a new encoding is detected the parser is switched to decode |
* If a new encoding is detected the parser is switched to decode |
| * it and pass UTF8 |
* it and pass UTF8 |
| */ |
*/ |
| static void |
static void |
| htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { |
| const xmlChar *encoding; | |
| |
|
| if ((ctxt == NULL) || (attvalue == NULL)) | if ((ctxt == NULL) || (encoding == NULL) || |
| | (ctxt->options & HTML_PARSE_IGNORE_ENC)) |
| return; |
return; |
| |
|
| /* do not change encoding */ |
/* do not change encoding */ |
| if (ctxt->input->encoding != NULL) |
if (ctxt->input->encoding != NULL) |
| return; |
return; |
| |
|
| encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); |
|
| if (encoding != NULL) { |
if (encoding != NULL) { |
| encoding += 8; |
|
| } else { |
|
| encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); |
|
| if (encoding != NULL) |
|
| encoding += 9; |
|
| } |
|
| if (encoding != NULL) { |
|
| xmlCharEncoding enc; |
xmlCharEncoding enc; |
| xmlCharEncodingHandlerPtr handler; |
xmlCharEncodingHandlerPtr handler; |
| |
|
|
Line 3500 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
Line 3543 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
| xmlSwitchToEncoding(ctxt, handler); |
xmlSwitchToEncoding(ctxt, handler); |
| ctxt->charset = XML_CHAR_ENCODING_UTF8; |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
| } else { |
} else { |
| ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| | "htmlCheckEncoding: unknown encoding %s\n", |
| | encoding, NULL); |
| } |
} |
| } |
} |
| |
|
|
Line 3515 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
Line 3560 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
| * convert as much as possible to the parser reading buffer. |
* convert as much as possible to the parser reading buffer. |
| */ |
*/ |
| processed = ctxt->input->cur - ctxt->input->base; |
processed = ctxt->input->cur - ctxt->input->base; |
| xmlBufferShrink(ctxt->input->buf->buffer, processed); | xmlBufShrink(ctxt->input->buf->buffer, processed); |
| nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | nbchars = xmlCharEncInput(ctxt->input->buf, 1); |
| ctxt->input->buf->buffer, | |
| ctxt->input->buf->raw); | |
| if (nbchars < 0) { |
if (nbchars < 0) { |
| htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| "htmlCheckEncoding: encoder error\n", |
"htmlCheckEncoding: encoder error\n", |
| NULL, NULL); |
NULL, NULL); |
| } |
} |
| ctxt->input->base = | xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); |
| ctxt->input->cur = ctxt->input->buf->buffer->content; | |
| ctxt->input->end = | |
| &ctxt->input->base[ctxt->input->buf->buffer->use]; | |
| } |
} |
| } |
} |
| } |
} |
| |
|
| /** |
/** |
| |
* htmlCheckEncoding: |
| |
* @ctxt: an HTML parser context |
| |
* @attvalue: the attribute value |
| |
* |
| |
* Checks an http-equiv attribute from a Meta tag to detect |
| |
* the encoding |
| |
* If a new encoding is detected the parser is switched to decode |
| |
* it and pass UTF8 |
| |
*/ |
| |
static void |
| |
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
| |
const xmlChar *encoding; |
| |
|
| |
if (!attvalue) |
| |
return; |
| |
|
| |
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset"); |
| |
if (encoding != NULL) { |
| |
encoding += 7; |
| |
} |
| |
/* |
| |
* skip blank |
| |
*/ |
| |
if (encoding && IS_BLANK_CH(*encoding)) |
| |
encoding = xmlStrcasestr(attvalue, BAD_CAST"="); |
| |
if (encoding && *encoding == '=') { |
| |
encoding ++; |
| |
htmlCheckEncodingDirect(ctxt, encoding); |
| |
} |
| |
} |
| |
|
| |
/** |
| * htmlCheckMeta: |
* htmlCheckMeta: |
| * @ctxt: an HTML parser context |
* @ctxt: an HTML parser context |
| * @atts: the attributes values |
* @atts: the attributes values |
|
Line 3556 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **
|
Line 3628 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **
|
| if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
| && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
| http = 1; |
http = 1; |
| |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) |
| |
htmlCheckEncodingDirect(ctxt, value); |
| else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
| content = value; |
content = value; |
| att = atts[i++]; |
att = atts[i++]; |
|
Line 3885 htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
Line 3959 htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
| if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| ctxt->sax->endElement(ctxt->userData, name); |
ctxt->sax->endElement(ctxt->userData, name); |
| |
htmlNodeInfoPop(ctxt); |
| htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
| ret = 1; |
ret = 1; |
| } else { |
} else { |
|
Line 4877 htmlCreateMemoryParserCtxt(const char *buffer, int siz
|
Line 4952 htmlCreateMemoryParserCtxt(const char *buffer, int siz
|
| |
|
| input->filename = NULL; |
input->filename = NULL; |
| input->buf = buf; |
input->buf = buf; |
| input->base = input->buf->buffer->content; | xmlBufResetInput(buf->buffer, input); |
| input->cur = input->buf->buffer->content; | |
| input->end = &input->buf->buffer->content[input->buf->buffer->use]; | |
| |
|
| inputPush(ctxt, input); |
inputPush(ctxt, input); |
| return(ctxt); |
return(ctxt); |
|
Line 4996 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
Line 5069 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
| buf = in->base; |
buf = in->base; |
| len = in->length; |
len = in->length; |
| } else { |
} else { |
| buf = in->buf->buffer->content; | buf = xmlBufContent(in->buf->buffer); |
| len = in->buf->buffer->use; | len = xmlBufUse(in->buf->buffer); |
| } |
} |
| |
|
| /* take into account the sequence length */ |
/* take into account the sequence length */ |
|
Line 5089 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
Line 5162 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
| * @stop: Array of chars, which stop the lookup. |
* @stop: Array of chars, which stop the lookup. |
| * @stopLen: Length of stop-Array |
* @stopLen: Length of stop-Array |
| * |
* |
| * Try to find if any char of the stop-Array is available in the input | * Try to find if any char of the stop-Array is available in the input |
| * stream. |
* stream. |
| * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
* This function has a side effect of (possibly) incrementing ctxt->checkIndex |
| * to avoid rescanning sequences of bytes, it DOES change the state of the |
* to avoid rescanning sequences of bytes, it DOES change the state of the |
| * parser, do not use liberally. |
* parser, do not use liberally. |
| * |
* |
| * Returns the index to the current parsing point if a stopChar | * Returns the index to the current parsing point if a stopChar |
| * is available, -1 otherwise. |
* is available, -1 otherwise. |
| */ |
*/ |
| static int |
static int |
|
Line 5123 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xml
|
Line 5196 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xml
|
| buf = in->base; |
buf = in->base; |
| len = in->length; |
len = in->length; |
| } else { |
} else { |
| buf = in->buf->buffer->content; | buf = xmlBufContent(in->buf->buffer); |
| len = in->buf->buffer->use; | len = xmlBufUse(in->buf->buffer); |
| } |
} |
| |
|
| for (; base < len; base++) { |
for (; base < len; base++) { |
|
Line 5173 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5246 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| int avail = 0; |
int avail = 0; |
| xmlChar cur, next; |
xmlChar cur, next; |
| |
|
| |
htmlParserNodeInfo node_info; |
| |
|
| #ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
| switch (ctxt->instate) { |
switch (ctxt->instate) { |
| case XML_PARSER_EOF: |
case XML_PARSER_EOF: |
|
Line 5233 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5308 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| if (in->buf == NULL) |
if (in->buf == NULL) |
| avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
| else |
else |
| avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| if ((avail == 0) && (terminate)) { |
if ((avail == 0) && (terminate)) { |
| htmlAutoCloseOnEnd(ctxt); |
htmlAutoCloseOnEnd(ctxt); |
| if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
|
Line 5269 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5344 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| if (in->buf == NULL) |
if (in->buf == NULL) |
| avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
| else |
else |
| avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| } |
} |
| if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
| ctxt->sax->setDocumentLocator(ctxt->userData, |
ctxt->sax->setDocumentLocator(ctxt->userData, |
|
Line 5311 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5386 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| if (in->buf == NULL) |
if (in->buf == NULL) |
| avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
| else |
else |
| avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| if (avail < 2) | /* |
| | * no chars in buffer |
| | */ |
| | if (avail < 1) |
| goto done; |
goto done; |
| |
/* |
| |
* not enouth chars in buffer |
| |
*/ |
| |
if (avail < 2) { |
| |
if (!terminate) |
| |
goto done; |
| |
else |
| |
next = ' '; |
| |
} else { |
| |
next = in->cur[1]; |
| |
} |
| cur = in->cur[0]; |
cur = in->cur[0]; |
| next = in->cur[1]; |
|
| if ((cur == '<') && (next == '!') && |
if ((cur == '<') && (next == '!') && |
| (in->cur[2] == '-') && (in->cur[3] == '-')) { |
(in->cur[2] == '-') && (in->cur[3] == '-')) { |
| if ((!terminate) && |
if ((!terminate) && |
|
Line 5371 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5459 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| if (in->buf == NULL) |
if (in->buf == NULL) |
| avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
| else |
else |
| avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| if (avail < 2) |
if (avail < 2) |
| goto done; |
goto done; |
| cur = in->cur[0]; |
cur = in->cur[0]; |
|
Line 5412 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5500 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| if (in->buf == NULL) |
if (in->buf == NULL) |
| avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
| else |
else |
| avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| if (avail < 1) |
if (avail < 1) |
| goto done; |
goto done; |
| cur = in->cur[0]; |
cur = in->cur[0]; |
|
Line 5465 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5553 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| int failed; |
int failed; |
| const htmlElemDesc * info; |
const htmlElemDesc * info; |
| |
|
| if (avail < 2) | /* |
| | * no chars in buffer |
| | */ |
| | if (avail < 1) |
| goto done; |
goto done; |
| |
/* |
| |
* not enouth chars in buffer |
| |
*/ |
| |
if (avail < 2) { |
| |
if (!terminate) |
| |
goto done; |
| |
else |
| |
next = ' '; |
| |
} else { |
| |
next = in->cur[1]; |
| |
} |
| cur = in->cur[0]; |
cur = in->cur[0]; |
| if (cur != '<') { |
if (cur != '<') { |
| ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
|
Line 5476 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5578 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| #endif |
#endif |
| break; |
break; |
| } |
} |
| if (in->cur[1] == '/') { | if (next == '/') { |
| ctxt->instate = XML_PARSER_END_TAG; |
ctxt->instate = XML_PARSER_END_TAG; |
| ctxt->checkIndex = 0; |
ctxt->checkIndex = 0; |
| #ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
|
Line 5489 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5591 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| goto done; |
goto done; |
| |
|
| |
/* Capture start position */ |
| |
if (ctxt->record_info) { |
| |
node_info.begin_pos = ctxt->input->consumed + |
| |
(CUR_PTR - ctxt->input->base); |
| |
node_info.begin_line = ctxt->input->line; |
| |
} |
| |
|
| |
|
| failed = htmlParseStartTag(ctxt); |
failed = htmlParseStartTag(ctxt); |
| name = ctxt->name; |
name = ctxt->name; |
| if ((failed == -1) || |
if ((failed == -1) || |
|
Line 5538 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5648 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
| } |
} |
| |
|
| |
if (ctxt->record_info) |
| |
htmlNodeInfoPush(ctxt, &node_info); |
| |
|
| ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
| #ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
| xmlGenericError(xmlGenericErrorContext, |
xmlGenericError(xmlGenericErrorContext, |
|
Line 5554 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5667 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| ctxt->sax->endElement(ctxt->userData, name); |
ctxt->sax->endElement(ctxt->userData, name); |
| htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
| } |
} |
| |
|
| |
if (ctxt->record_info) |
| |
htmlNodeInfoPush(ctxt, &node_info); |
| |
|
| ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
| #ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
| xmlGenericError(xmlGenericErrorContext, |
xmlGenericError(xmlGenericErrorContext, |
|
Line 5581 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5698 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
| if ((cur != '<') && (cur != '&')) { |
if ((cur != '<') && (cur != '&')) { |
| if (ctxt->sax != NULL) { |
if (ctxt->sax != NULL) { |
| if (IS_BLANK_CH(cur)) { |
if (IS_BLANK_CH(cur)) { |
| if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
| ctxt->sax->ignorableWhitespace( | if (ctxt->sax->characters != NULL) |
| ctxt->userData, &cur, 1); | ctxt->sax->characters( |
| | ctxt->userData, &cur, 1); |
| | } else { |
| | if (ctxt->sax->ignorableWhitespace != NULL) |
| | ctxt->sax->ignorableWhitespace( |
| | ctxt->userData, &cur, 1); |
| | } |
| } else { |
} else { |
| htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
| if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
|
Line 5906 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6029 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
| } |
} |
| if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
| (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
| int base = ctxt->input->base - ctxt->input->buf->buffer->content; | size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
| int cur = ctxt->input->cur - ctxt->input->base; | size_t cur = ctxt->input->cur - ctxt->input->base; |
| int res; |
int res; |
| |
|
| res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
|
Line 5916 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6039 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
| ctxt->disableSAX = 1; |
ctxt->disableSAX = 1; |
| return (XML_PARSER_EOF); |
return (XML_PARSER_EOF); |
| } |
} |
| ctxt->input->base = ctxt->input->buf->buffer->content + base; | xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
| ctxt->input->cur = ctxt->input->base + cur; | |
| ctxt->input->end = | |
| &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
| #ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
| xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
| #endif |
#endif |
|
Line 5934 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6054 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
| if ((in->encoder != NULL) && (in->buffer != NULL) && |
if ((in->encoder != NULL) && (in->buffer != NULL) && |
| (in->raw != NULL)) { |
(in->raw != NULL)) { |
| int nbchars; |
int nbchars; |
| |
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); |
| |
size_t current = ctxt->input->cur - ctxt->input->base; |
| |
|
| nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | nbchars = xmlCharEncInput(in, terminate); |
| if (nbchars < 0) { |
if (nbchars < 0) { |
| htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| "encoder error\n", NULL, NULL); |
"encoder error\n", NULL, NULL); |
| return(XML_ERR_INVALID_ENCODING); |
return(XML_ERR_INVALID_ENCODING); |
| } |
} |
| |
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); |
| } |
} |
| } |
} |
| } |
} |
|
Line 6034 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *
|
Line 6157 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *
|
| inputStream->filename = (char *) |
inputStream->filename = (char *) |
| xmlCanonicPath((const xmlChar *) filename); |
xmlCanonicPath((const xmlChar *) filename); |
| inputStream->buf = buf; |
inputStream->buf = buf; |
| inputStream->base = inputStream->buf->buffer->content; | xmlBufResetInput(buf->buffer, inputStream); |
| inputStream->cur = inputStream->buf->buffer->content; | |
| inputStream->end = | |
| &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | |
| |
|
| inputPush(ctxt, inputStream); |
inputPush(ctxt, inputStream); |
| |
|
| if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
| (ctxt->input->buf != NULL)) { |
(ctxt->input->buf != NULL)) { |
| int base = ctxt->input->base - ctxt->input->buf->buffer->content; | size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
| int cur = ctxt->input->cur - ctxt->input->base; | size_t cur = ctxt->input->cur - ctxt->input->base; |
| |
|
| xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
| |
|
| ctxt->input->base = ctxt->input->buf->buffer->content + base; | xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
| ctxt->input->cur = ctxt->input->base + cur; | |
| ctxt->input->end = | |
| &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
| #ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
| xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
| #endif |
#endif |
|
Line 6537 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options
|
Line 6654 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options
|
| ctxt->options |= HTML_PARSE_NODEFDTD; |
ctxt->options |= HTML_PARSE_NODEFDTD; |
| options -= HTML_PARSE_NODEFDTD; |
options -= HTML_PARSE_NODEFDTD; |
| } |
} |
| |
if (options & HTML_PARSE_IGNORE_ENC) { |
| |
ctxt->options |= HTML_PARSE_IGNORE_ENC; |
| |
options -= HTML_PARSE_IGNORE_ENC; |
| |
} |
| |
if (options & HTML_PARSE_NOIMPLIED) { |
| |
ctxt->options |= HTML_PARSE_NOIMPLIED; |
| |
options -= HTML_PARSE_NOIMPLIED; |
| |
} |
| ctxt->dictNames = 0; |
ctxt->dictNames = 0; |
| return (options); |
return (options); |
| } |
} |
|
Line 6730 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseC
|
Line 6855 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseC
|
| |
|
| input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
| XML_CHAR_ENCODING_NONE); |
XML_CHAR_ENCODING_NONE); |
| if (input == NULL) | if (input == NULL) { |
| | if (ioclose != NULL) |
| | ioclose(ioctx); |
| return (NULL); |
return (NULL); |
| |
} |
| ctxt = htmlNewParserCtxt(); |
ctxt = htmlNewParserCtxt(); |
| if (ctxt == NULL) { |
if (ctxt == NULL) { |
| xmlFreeParserInputBuffer(input); |
xmlFreeParserInputBuffer(input); |
|
Line 6930 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCal
|
Line 7058 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCal
|
| |
|
| input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
| XML_CHAR_ENCODING_NONE); |
XML_CHAR_ENCODING_NONE); |
| if (input == NULL) | if (input == NULL) { |
| | if (ioclose != NULL) |
| | ioclose(ioctx); |
| return (NULL); |
return (NULL); |
| |
} |
| stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
| if (stream == NULL) { |
if (stream == NULL) { |
| xmlFreeParserInputBuffer(input); |
xmlFreeParserInputBuffer(input); |