version 1.1.1.1, 2012/02/21 23:37:58
|
version 1.1.1.3, 2014/06/15 19:53:28
|
Line 44
|
Line 44
|
#include <libxml/globals.h> |
#include <libxml/globals.h> |
#include <libxml/uri.h> |
#include <libxml/uri.h> |
|
|
|
#include "buf.h" |
|
#include "enc.h" |
|
|
#define HTML_MAX_NAMELEN 1000 |
#define HTML_MAX_NAMELEN 1000 |
#define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
#define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
#define HTML_PARSER_BUFFER_SIZE 100 |
#define HTML_PARSER_BUFFER_SIZE 100 |
Line 727 static const char* const map_contents[] = { BLOCK, "ar
|
Line 730 static const char* const map_contents[] = { BLOCK, "ar
|
static const char* const name_attr[] = { "name", NULL } ; |
static const char* const name_attr[] = { "name", NULL } ; |
static const char* const action_attr[] = { "action", NULL } ; |
static const char* const action_attr[] = { "action", NULL } ; |
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ; | static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ; |
static const char* const content_attr[] = { "content", NULL } ; |
static const char* const content_attr[] = { "content", NULL } ; |
static const char* const type_attr[] = { "type", NULL } ; |
static const char* const type_attr[] = { "type", NULL } ; |
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
Line 1080 static const char * const htmlStartClose[] = {
|
Line 1083 static const char * const htmlStartClose[] = {
|
"menu", "p", "head", "ul", NULL, |
"menu", "p", "head", "ul", NULL, |
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
"div", "p", "head", NULL, |
"div", "p", "head", NULL, |
"noscript", "p", "head", NULL, | "noscript", "p", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
"a", "a", NULL, | "a", "a", "head", NULL, |
"caption", "p", NULL, |
"caption", "p", NULL, |
"colgroup", "caption", "colgroup", "col", "p", NULL, |
"colgroup", "caption", "colgroup", "col", "p", NULL, |
"col", "caption", "col", "p", NULL, |
"col", "caption", "col", "p", NULL, |
Line 1100 static const char * const htmlStartClose[] = {
|
Line 1103 static const char * const htmlStartClose[] = {
|
"option", "option", NULL, |
"option", "option", NULL, |
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
"pre", "listing", "xmp", "a", NULL, |
"pre", "listing", "xmp", "a", NULL, |
|
/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ |
|
"tt", "head", NULL, |
|
"i", "head", NULL, |
|
"b", "head", NULL, |
|
"u", "head", NULL, |
|
"s", "head", NULL, |
|
"strike", "head", NULL, |
|
"big", "head", NULL, |
|
"small", "head", NULL, |
|
|
|
"em", "head", NULL, |
|
"strong", "head", NULL, |
|
"dfn", "head", NULL, |
|
"code", "head", NULL, |
|
"samp", "head", NULL, |
|
"kbd", "head", NULL, |
|
"var", "head", NULL, |
|
"cite", "head", NULL, |
|
"abbr", "head", NULL, |
|
"acronym", "head", NULL, |
|
|
|
/* "a" */ |
|
"img", "head", NULL, |
|
/* "applet" */ |
|
/* "embed" */ |
|
/* "object" */ |
|
"font", "head", NULL, |
|
/* "basefont" */ |
|
"br", "head", NULL, |
|
/* "script" */ |
|
"map", "head", NULL, |
|
"q", "head", NULL, |
|
"sub", "head", NULL, |
|
"sup", "head", NULL, |
|
"span", "head", NULL, |
|
"bdo", "head", NULL, |
|
"iframe", "head", NULL, |
NULL |
NULL |
}; |
}; |
|
|
Line 2941 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
Line 2981 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
*/ |
*/ |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
ctxt->sax->ignorableWhitespace(ctxt->userData, | if (ctxt->sax->characters != NULL) |
buf, nbchar); | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| } else { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, |
| buf, nbchar); |
| } |
} else { |
} else { |
htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
Line 2974 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
Line 3019 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
*/ |
*/ |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); | if (ctxt->sax->characters != NULL) |
| ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| } else { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, |
| buf, nbchar); |
| } |
} else { |
} else { |
htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
Line 3435 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **v
|
Line 3486 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **v
|
} |
} |
|
|
/** |
/** |
* htmlCheckEncoding: | * htmlCheckEncodingDirect: |
* @ctxt: an HTML parser context |
* @ctxt: an HTML parser context |
* @attvalue: the attribute value |
* @attvalue: the attribute value |
* |
* |
* Checks an http-equiv attribute from a Meta tag to detect | * Checks an attribute value to detect |
* the encoding |
* the encoding |
* If a new encoding is detected the parser is switched to decode |
* If a new encoding is detected the parser is switched to decode |
* it and pass UTF8 |
* it and pass UTF8 |
*/ |
*/ |
static void |
static void |
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { |
const xmlChar *encoding; | |
|
|
if ((ctxt == NULL) || (attvalue == NULL)) | if ((ctxt == NULL) || (encoding == NULL) || |
| (ctxt->options & HTML_PARSE_IGNORE_ENC)) |
return; |
return; |
|
|
/* do not change encoding */ |
/* do not change encoding */ |
if (ctxt->input->encoding != NULL) |
if (ctxt->input->encoding != NULL) |
return; |
return; |
|
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); |
|
if (encoding != NULL) { |
if (encoding != NULL) { |
encoding += 8; |
|
} else { |
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); |
|
if (encoding != NULL) |
|
encoding += 9; |
|
} |
|
if (encoding != NULL) { |
|
xmlCharEncoding enc; |
xmlCharEncoding enc; |
xmlCharEncodingHandlerPtr handler; |
xmlCharEncodingHandlerPtr handler; |
|
|
Line 3500 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
Line 3543 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
xmlSwitchToEncoding(ctxt, handler); |
xmlSwitchToEncoding(ctxt, handler); |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
} else { |
} else { |
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| "htmlCheckEncoding: unknown encoding %s\n", |
| encoding, NULL); |
} |
} |
} |
} |
|
|
Line 3515 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
Line 3560 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
* convert as much as possible to the parser reading buffer. |
* convert as much as possible to the parser reading buffer. |
*/ |
*/ |
processed = ctxt->input->cur - ctxt->input->base; |
processed = ctxt->input->cur - ctxt->input->base; |
xmlBufferShrink(ctxt->input->buf->buffer, processed); | xmlBufShrink(ctxt->input->buf->buffer, processed); |
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | nbchars = xmlCharEncInput(ctxt->input->buf, 1); |
ctxt->input->buf->buffer, | |
ctxt->input->buf->raw); | |
if (nbchars < 0) { |
if (nbchars < 0) { |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
"htmlCheckEncoding: encoder error\n", |
"htmlCheckEncoding: encoder error\n", |
NULL, NULL); |
NULL, NULL); |
} |
} |
ctxt->input->base = | xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); |
ctxt->input->cur = ctxt->input->buf->buffer->content; | |
ctxt->input->end = | |
&ctxt->input->base[ctxt->input->buf->buffer->use]; | |
} |
} |
} |
} |
} |
} |
|
|
/** |
/** |
|
* htmlCheckEncoding: |
|
* @ctxt: an HTML parser context |
|
* @attvalue: the attribute value |
|
* |
|
* Checks an http-equiv attribute from a Meta tag to detect |
|
* the encoding |
|
* If a new encoding is detected the parser is switched to decode |
|
* it and pass UTF8 |
|
*/ |
|
static void |
|
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
|
const xmlChar *encoding; |
|
|
|
if (!attvalue) |
|
return; |
|
|
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset"); |
|
if (encoding != NULL) { |
|
encoding += 7; |
|
} |
|
/* |
|
* skip blank |
|
*/ |
|
if (encoding && IS_BLANK_CH(*encoding)) |
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"="); |
|
if (encoding && *encoding == '=') { |
|
encoding ++; |
|
htmlCheckEncodingDirect(ctxt, encoding); |
|
} |
|
} |
|
|
|
/** |
* htmlCheckMeta: |
* htmlCheckMeta: |
* @ctxt: an HTML parser context |
* @ctxt: an HTML parser context |
* @atts: the attributes values |
* @atts: the attributes values |
Line 3556 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **
|
Line 3628 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **
|
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
http = 1; |
http = 1; |
|
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) |
|
htmlCheckEncodingDirect(ctxt, value); |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
content = value; |
content = value; |
att = atts[i++]; |
att = atts[i++]; |
Line 3885 htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
Line 3959 htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
ctxt->sax->endElement(ctxt->userData, name); |
ctxt->sax->endElement(ctxt->userData, name); |
|
htmlNodeInfoPop(ctxt); |
htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
ret = 1; |
ret = 1; |
} else { |
} else { |
Line 4877 htmlCreateMemoryParserCtxt(const char *buffer, int siz
|
Line 4952 htmlCreateMemoryParserCtxt(const char *buffer, int siz
|
|
|
input->filename = NULL; |
input->filename = NULL; |
input->buf = buf; |
input->buf = buf; |
input->base = input->buf->buffer->content; | xmlBufResetInput(buf->buffer, input); |
input->cur = input->buf->buffer->content; | |
input->end = &input->buf->buffer->content[input->buf->buffer->use]; | |
|
|
inputPush(ctxt, input); |
inputPush(ctxt, input); |
return(ctxt); |
return(ctxt); |
Line 4996 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
Line 5069 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
buf = in->base; |
buf = in->base; |
len = in->length; |
len = in->length; |
} else { |
} else { |
buf = in->buf->buffer->content; | buf = xmlBufContent(in->buf->buffer); |
len = in->buf->buffer->use; | len = xmlBufUse(in->buf->buffer); |
} |
} |
|
|
/* take into account the sequence length */ |
/* take into account the sequence length */ |
Line 5089 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
Line 5162 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
* @stop: Array of chars, which stop the lookup. |
* @stop: Array of chars, which stop the lookup. |
* @stopLen: Length of stop-Array |
* @stopLen: Length of stop-Array |
* |
* |
* Try to find if any char of the stop-Array is available in the input | * Try to find if any char of the stop-Array is available in the input |
* stream. |
* stream. |
* This function has a side effect of (possibly) incrementing ctxt->checkIndex |
* This function has a side effect of (possibly) incrementing ctxt->checkIndex |
* to avoid rescanning sequences of bytes, it DOES change the state of the |
* to avoid rescanning sequences of bytes, it DOES change the state of the |
* parser, do not use liberally. |
* parser, do not use liberally. |
* |
* |
* Returns the index to the current parsing point if a stopChar | * Returns the index to the current parsing point if a stopChar |
* is available, -1 otherwise. |
* is available, -1 otherwise. |
*/ |
*/ |
static int |
static int |
Line 5123 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xml
|
Line 5196 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xml
|
buf = in->base; |
buf = in->base; |
len = in->length; |
len = in->length; |
} else { |
} else { |
buf = in->buf->buffer->content; | buf = xmlBufContent(in->buf->buffer); |
len = in->buf->buffer->use; | len = xmlBufUse(in->buf->buffer); |
} |
} |
|
|
for (; base < len; base++) { |
for (; base < len; base++) { |
Line 5173 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5246 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
int avail = 0; |
int avail = 0; |
xmlChar cur, next; |
xmlChar cur, next; |
|
|
|
htmlParserNodeInfo node_info; |
|
|
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
switch (ctxt->instate) { |
switch (ctxt->instate) { |
case XML_PARSER_EOF: |
case XML_PARSER_EOF: |
Line 5233 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5308 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if ((avail == 0) && (terminate)) { |
if ((avail == 0) && (terminate)) { |
htmlAutoCloseOnEnd(ctxt); |
htmlAutoCloseOnEnd(ctxt); |
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
Line 5269 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5344 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
} |
} |
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
ctxt->sax->setDocumentLocator(ctxt->userData, |
ctxt->sax->setDocumentLocator(ctxt->userData, |
Line 5311 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5386 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 2) | /* |
| * no chars in buffer |
| */ |
| if (avail < 1) |
goto done; |
goto done; |
|
/* |
|
* not enouth chars in buffer |
|
*/ |
|
if (avail < 2) { |
|
if (!terminate) |
|
goto done; |
|
else |
|
next = ' '; |
|
} else { |
|
next = in->cur[1]; |
|
} |
cur = in->cur[0]; |
cur = in->cur[0]; |
next = in->cur[1]; |
|
if ((cur == '<') && (next == '!') && |
if ((cur == '<') && (next == '!') && |
(in->cur[2] == '-') && (in->cur[3] == '-')) { |
(in->cur[2] == '-') && (in->cur[3] == '-')) { |
if ((!terminate) && |
if ((!terminate) && |
Line 5371 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5459 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 2) |
if (avail < 2) |
goto done; |
goto done; |
cur = in->cur[0]; |
cur = in->cur[0]; |
Line 5412 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5500 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 1) |
if (avail < 1) |
goto done; |
goto done; |
cur = in->cur[0]; |
cur = in->cur[0]; |
Line 5465 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5553 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
int failed; |
int failed; |
const htmlElemDesc * info; |
const htmlElemDesc * info; |
|
|
if (avail < 2) | /* |
| * no chars in buffer |
| */ |
| if (avail < 1) |
goto done; |
goto done; |
|
/* |
|
* not enouth chars in buffer |
|
*/ |
|
if (avail < 2) { |
|
if (!terminate) |
|
goto done; |
|
else |
|
next = ' '; |
|
} else { |
|
next = in->cur[1]; |
|
} |
cur = in->cur[0]; |
cur = in->cur[0]; |
if (cur != '<') { |
if (cur != '<') { |
ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
Line 5476 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5578 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
#endif |
#endif |
break; |
break; |
} |
} |
if (in->cur[1] == '/') { | if (next == '/') { |
ctxt->instate = XML_PARSER_END_TAG; |
ctxt->instate = XML_PARSER_END_TAG; |
ctxt->checkIndex = 0; |
ctxt->checkIndex = 0; |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
Line 5489 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5591 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
goto done; |
goto done; |
|
|
|
/* Capture start position */ |
|
if (ctxt->record_info) { |
|
node_info.begin_pos = ctxt->input->consumed + |
|
(CUR_PTR - ctxt->input->base); |
|
node_info.begin_line = ctxt->input->line; |
|
} |
|
|
|
|
failed = htmlParseStartTag(ctxt); |
failed = htmlParseStartTag(ctxt); |
name = ctxt->name; |
name = ctxt->name; |
if ((failed == -1) || |
if ((failed == -1) || |
Line 5538 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5648 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
} |
} |
|
|
|
if (ctxt->record_info) |
|
htmlNodeInfoPush(ctxt, &node_info); |
|
|
ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, |
xmlGenericError(xmlGenericErrorContext, |
Line 5554 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5667 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
ctxt->sax->endElement(ctxt->userData, name); |
ctxt->sax->endElement(ctxt->userData, name); |
htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
} |
} |
|
|
|
if (ctxt->record_info) |
|
htmlNodeInfoPush(ctxt, &node_info); |
|
|
ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, |
xmlGenericError(xmlGenericErrorContext, |
Line 5581 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5698 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if ((cur != '<') && (cur != '&')) { |
if ((cur != '<') && (cur != '&')) { |
if (ctxt->sax != NULL) { |
if (ctxt->sax != NULL) { |
if (IS_BLANK_CH(cur)) { |
if (IS_BLANK_CH(cur)) { |
if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
ctxt->sax->ignorableWhitespace( | if (ctxt->sax->characters != NULL) |
ctxt->userData, &cur, 1); | ctxt->sax->characters( |
| ctxt->userData, &cur, 1); |
| } else { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace( |
| ctxt->userData, &cur, 1); |
| } |
} else { |
} else { |
htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
Line 5906 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6029 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
} |
} |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
int base = ctxt->input->base - ctxt->input->buf->buffer->content; | size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
int cur = ctxt->input->cur - ctxt->input->base; | size_t cur = ctxt->input->cur - ctxt->input->base; |
int res; |
int res; |
|
|
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
Line 5916 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6039 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
ctxt->disableSAX = 1; |
ctxt->disableSAX = 1; |
return (XML_PARSER_EOF); |
return (XML_PARSER_EOF); |
} |
} |
ctxt->input->base = ctxt->input->buf->buffer->content + base; | xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
ctxt->input->cur = ctxt->input->base + cur; | |
ctxt->input->end = | |
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
#endif |
#endif |
Line 5934 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6054 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
if ((in->encoder != NULL) && (in->buffer != NULL) && |
if ((in->encoder != NULL) && (in->buffer != NULL) && |
(in->raw != NULL)) { |
(in->raw != NULL)) { |
int nbchars; |
int nbchars; |
|
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); |
|
size_t current = ctxt->input->cur - ctxt->input->base; |
|
|
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | nbchars = xmlCharEncInput(in, terminate); |
if (nbchars < 0) { |
if (nbchars < 0) { |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
"encoder error\n", NULL, NULL); |
"encoder error\n", NULL, NULL); |
return(XML_ERR_INVALID_ENCODING); |
return(XML_ERR_INVALID_ENCODING); |
} |
} |
|
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); |
} |
} |
} |
} |
} |
} |
Line 6034 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *
|
Line 6157 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *
|
inputStream->filename = (char *) |
inputStream->filename = (char *) |
xmlCanonicPath((const xmlChar *) filename); |
xmlCanonicPath((const xmlChar *) filename); |
inputStream->buf = buf; |
inputStream->buf = buf; |
inputStream->base = inputStream->buf->buffer->content; | xmlBufResetInput(buf->buffer, inputStream); |
inputStream->cur = inputStream->buf->buffer->content; | |
inputStream->end = | |
&inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | |
|
|
inputPush(ctxt, inputStream); |
inputPush(ctxt, inputStream); |
|
|
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL)) { |
(ctxt->input->buf != NULL)) { |
int base = ctxt->input->base - ctxt->input->buf->buffer->content; | size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
int cur = ctxt->input->cur - ctxt->input->base; | size_t cur = ctxt->input->cur - ctxt->input->base; |
|
|
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
|
|
ctxt->input->base = ctxt->input->buf->buffer->content + base; | xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
ctxt->input->cur = ctxt->input->base + cur; | |
ctxt->input->end = | |
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
#endif |
#endif |
Line 6537 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options
|
Line 6654 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options
|
ctxt->options |= HTML_PARSE_NODEFDTD; |
ctxt->options |= HTML_PARSE_NODEFDTD; |
options -= HTML_PARSE_NODEFDTD; |
options -= HTML_PARSE_NODEFDTD; |
} |
} |
|
if (options & HTML_PARSE_IGNORE_ENC) { |
|
ctxt->options |= HTML_PARSE_IGNORE_ENC; |
|
options -= HTML_PARSE_IGNORE_ENC; |
|
} |
|
if (options & HTML_PARSE_NOIMPLIED) { |
|
ctxt->options |= HTML_PARSE_NOIMPLIED; |
|
options -= HTML_PARSE_NOIMPLIED; |
|
} |
ctxt->dictNames = 0; |
ctxt->dictNames = 0; |
return (options); |
return (options); |
} |
} |
Line 6730 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseC
|
Line 6855 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseC
|
|
|
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
XML_CHAR_ENCODING_NONE); |
XML_CHAR_ENCODING_NONE); |
if (input == NULL) | if (input == NULL) { |
| if (ioclose != NULL) |
| ioclose(ioctx); |
return (NULL); |
return (NULL); |
|
} |
ctxt = htmlNewParserCtxt(); |
ctxt = htmlNewParserCtxt(); |
if (ctxt == NULL) { |
if (ctxt == NULL) { |
xmlFreeParserInputBuffer(input); |
xmlFreeParserInputBuffer(input); |
Line 6930 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCal
|
Line 7058 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCal
|
|
|
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
XML_CHAR_ENCODING_NONE); |
XML_CHAR_ENCODING_NONE); |
if (input == NULL) | if (input == NULL) { |
| if (ioclose != NULL) |
| ioclose(ioctx); |
return (NULL); |
return (NULL); |
|
} |
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
if (stream == NULL) { |
if (stream == NULL) { |
xmlFreeParserInputBuffer(input); |
xmlFreeParserInputBuffer(input); |