version 1.1.1.2, 2013/07/22 01:22:19
|
version 1.1.1.3, 2014/06/15 19:53:28
|
Line 44
|
Line 44
|
#include <libxml/globals.h> |
#include <libxml/globals.h> |
#include <libxml/uri.h> |
#include <libxml/uri.h> |
|
|
|
#include "buf.h" |
|
#include "enc.h" |
|
|
#define HTML_MAX_NAMELEN 1000 |
#define HTML_MAX_NAMELEN 1000 |
#define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
#define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
#define HTML_PARSER_BUFFER_SIZE 100 |
#define HTML_PARSER_BUFFER_SIZE 100 |
Line 1082 static const char * const htmlStartClose[] = {
|
Line 1085 static const char * const htmlStartClose[] = {
|
"div", "p", "head", NULL, |
"div", "p", "head", NULL, |
"noscript", "p", NULL, |
"noscript", "p", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
"a", "a", NULL, | "a", "a", "head", NULL, |
"caption", "p", NULL, |
"caption", "p", NULL, |
"colgroup", "caption", "colgroup", "col", "p", NULL, |
"colgroup", "caption", "colgroup", "col", "p", NULL, |
"col", "caption", "col", "p", NULL, |
"col", "caption", "col", "p", NULL, |
Line 1100 static const char * const htmlStartClose[] = {
|
Line 1103 static const char * const htmlStartClose[] = {
|
"option", "option", NULL, |
"option", "option", NULL, |
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
"pre", "listing", "xmp", "a", NULL, |
"pre", "listing", "xmp", "a", NULL, |
|
/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ |
|
"tt", "head", NULL, |
|
"i", "head", NULL, |
|
"b", "head", NULL, |
|
"u", "head", NULL, |
|
"s", "head", NULL, |
|
"strike", "head", NULL, |
|
"big", "head", NULL, |
|
"small", "head", NULL, |
|
|
|
"em", "head", NULL, |
|
"strong", "head", NULL, |
|
"dfn", "head", NULL, |
|
"code", "head", NULL, |
|
"samp", "head", NULL, |
|
"kbd", "head", NULL, |
|
"var", "head", NULL, |
|
"cite", "head", NULL, |
|
"abbr", "head", NULL, |
|
"acronym", "head", NULL, |
|
|
|
/* "a" */ |
|
"img", "head", NULL, |
|
/* "applet" */ |
|
/* "embed" */ |
|
/* "object" */ |
|
"font", "head", NULL, |
|
/* "basefont" */ |
|
"br", "head", NULL, |
|
/* "script" */ |
|
"map", "head", NULL, |
|
"q", "head", NULL, |
|
"sub", "head", NULL, |
|
"sup", "head", NULL, |
|
"span", "head", NULL, |
|
"bdo", "head", NULL, |
|
"iframe", "head", NULL, |
NULL |
NULL |
}; |
}; |
|
|
Line 2941 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
Line 2981 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
*/ |
*/ |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
ctxt->sax->ignorableWhitespace(ctxt->userData, | if (ctxt->sax->characters != NULL) |
buf, nbchar); | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| } else { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, |
| buf, nbchar); |
| } |
} else { |
} else { |
htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
Line 2974 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
Line 3019 htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
*/ |
*/ |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); | if (ctxt->sax->characters != NULL) |
| ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| } else { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, |
| buf, nbchar); |
| } |
} else { |
} else { |
htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
Line 3509 htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const
|
Line 3560 htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const
|
* convert as much as possible to the parser reading buffer. |
* convert as much as possible to the parser reading buffer. |
*/ |
*/ |
processed = ctxt->input->cur - ctxt->input->base; |
processed = ctxt->input->cur - ctxt->input->base; |
xmlBufferShrink(ctxt->input->buf->buffer, processed); | xmlBufShrink(ctxt->input->buf->buffer, processed); |
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | nbchars = xmlCharEncInput(ctxt->input->buf, 1); |
ctxt->input->buf->buffer, | |
ctxt->input->buf->raw); | |
if (nbchars < 0) { |
if (nbchars < 0) { |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
"htmlCheckEncoding: encoder error\n", |
"htmlCheckEncoding: encoder error\n", |
NULL, NULL); |
NULL, NULL); |
} |
} |
ctxt->input->base = | xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); |
ctxt->input->cur = ctxt->input->buf->buffer->content; | |
ctxt->input->end = | |
&ctxt->input->base[ctxt->input->buf->buffer->use]; | |
} |
} |
} |
} |
} |
} |
Line 4906 htmlCreateMemoryParserCtxt(const char *buffer, int siz
|
Line 4952 htmlCreateMemoryParserCtxt(const char *buffer, int siz
|
|
|
input->filename = NULL; |
input->filename = NULL; |
input->buf = buf; |
input->buf = buf; |
input->base = input->buf->buffer->content; | xmlBufResetInput(buf->buffer, input); |
input->cur = input->buf->buffer->content; | |
input->end = &input->buf->buffer->content[input->buf->buffer->use]; | |
|
|
inputPush(ctxt, input); |
inputPush(ctxt, input); |
return(ctxt); |
return(ctxt); |
Line 5025 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
Line 5069 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
buf = in->base; |
buf = in->base; |
len = in->length; |
len = in->length; |
} else { |
} else { |
buf = in->buf->buffer->content; | buf = xmlBufContent(in->buf->buffer); |
len = in->buf->buffer->use; | len = xmlBufUse(in->buf->buffer); |
} |
} |
|
|
/* take into account the sequence length */ |
/* take into account the sequence length */ |
Line 5118 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
Line 5162 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlCha
|
* @stop: Array of chars, which stop the lookup. |
* @stop: Array of chars, which stop the lookup. |
* @stopLen: Length of stop-Array |
* @stopLen: Length of stop-Array |
* |
* |
* Try to find if any char of the stop-Array is available in the input | * Try to find if any char of the stop-Array is available in the input |
* stream. |
* stream. |
* This function has a side effect of (possibly) incrementing ctxt->checkIndex |
* This function has a side effect of (possibly) incrementing ctxt->checkIndex |
* to avoid rescanning sequences of bytes, it DOES change the state of the |
* to avoid rescanning sequences of bytes, it DOES change the state of the |
* parser, do not use liberally. |
* parser, do not use liberally. |
* |
* |
* Returns the index to the current parsing point if a stopChar | * Returns the index to the current parsing point if a stopChar |
* is available, -1 otherwise. |
* is available, -1 otherwise. |
*/ |
*/ |
static int |
static int |
Line 5152 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xml
|
Line 5196 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xml
|
buf = in->base; |
buf = in->base; |
len = in->length; |
len = in->length; |
} else { |
} else { |
buf = in->buf->buffer->content; | buf = xmlBufContent(in->buf->buffer); |
len = in->buf->buffer->use; | len = xmlBufUse(in->buf->buffer); |
} |
} |
|
|
for (; base < len; base++) { |
for (; base < len; base++) { |
Line 5264 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5308 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if ((avail == 0) && (terminate)) { |
if ((avail == 0) && (terminate)) { |
htmlAutoCloseOnEnd(ctxt); |
htmlAutoCloseOnEnd(ctxt); |
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
Line 5300 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5344 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
} |
} |
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
ctxt->sax->setDocumentLocator(ctxt->userData, |
ctxt->sax->setDocumentLocator(ctxt->userData, |
Line 5342 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5386 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
/* |
/* |
* no chars in buffer |
* no chars in buffer |
*/ |
*/ |
Line 5415 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5459 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 2) |
if (avail < 2) |
goto done; |
goto done; |
cur = in->cur[0]; |
cur = in->cur[0]; |
Line 5456 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5500 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if (in->buf == NULL) |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); | avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 1) |
if (avail < 1) |
goto done; |
goto done; |
cur = in->cur[0]; |
cur = in->cur[0]; |
Line 5654 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5698 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
if ((cur != '<') && (cur != '&')) { |
if ((cur != '<') && (cur != '&')) { |
if (ctxt->sax != NULL) { |
if (ctxt->sax != NULL) { |
if (IS_BLANK_CH(cur)) { |
if (IS_BLANK_CH(cur)) { |
if (ctxt->sax->ignorableWhitespace != NULL) | if (ctxt->keepBlanks) { |
ctxt->sax->ignorableWhitespace( | if (ctxt->sax->characters != NULL) |
ctxt->userData, &cur, 1); | ctxt->sax->characters( |
| ctxt->userData, &cur, 1); |
| } else { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace( |
| ctxt->userData, &cur, 1); |
| } |
} else { |
} else { |
htmlCheckParagraph(ctxt); |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
if (ctxt->sax->characters != NULL) |
Line 5979 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6029 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
} |
} |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
int base = ctxt->input->base - ctxt->input->buf->buffer->content; | size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
int cur = ctxt->input->cur - ctxt->input->base; | size_t cur = ctxt->input->cur - ctxt->input->base; |
int res; |
int res; |
|
|
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
Line 5989 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6039 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
ctxt->disableSAX = 1; |
ctxt->disableSAX = 1; |
return (XML_PARSER_EOF); |
return (XML_PARSER_EOF); |
} |
} |
ctxt->input->base = ctxt->input->buf->buffer->content + base; | xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
ctxt->input->cur = ctxt->input->base + cur; | |
ctxt->input->end = | |
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
#endif |
#endif |
Line 6007 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
Line 6054 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chu
|
if ((in->encoder != NULL) && (in->buffer != NULL) && |
if ((in->encoder != NULL) && (in->buffer != NULL) && |
(in->raw != NULL)) { |
(in->raw != NULL)) { |
int nbchars; |
int nbchars; |
|
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); |
|
size_t current = ctxt->input->cur - ctxt->input->base; |
|
|
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | nbchars = xmlCharEncInput(in, terminate); |
if (nbchars < 0) { |
if (nbchars < 0) { |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
"encoder error\n", NULL, NULL); |
"encoder error\n", NULL, NULL); |
return(XML_ERR_INVALID_ENCODING); |
return(XML_ERR_INVALID_ENCODING); |
} |
} |
|
xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); |
} |
} |
} |
} |
} |
} |
Line 6107 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *
|
Line 6157 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *
|
inputStream->filename = (char *) |
inputStream->filename = (char *) |
xmlCanonicPath((const xmlChar *) filename); |
xmlCanonicPath((const xmlChar *) filename); |
inputStream->buf = buf; |
inputStream->buf = buf; |
inputStream->base = inputStream->buf->buffer->content; | xmlBufResetInput(buf->buffer, inputStream); |
inputStream->cur = inputStream->buf->buffer->content; | |
inputStream->end = | |
&inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | |
|
|
inputPush(ctxt, inputStream); |
inputPush(ctxt, inputStream); |
|
|
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL)) { |
(ctxt->input->buf != NULL)) { |
int base = ctxt->input->base - ctxt->input->buf->buffer->content; | size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
int cur = ctxt->input->cur - ctxt->input->base; | size_t cur = ctxt->input->cur - ctxt->input->base; |
|
|
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
|
|
ctxt->input->base = ctxt->input->buf->buffer->content + base; | xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
ctxt->input->cur = ctxt->input->base + cur; | |
ctxt->input->end = | |
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
#endif |
#endif |