version 1.1, 2012/02/21 23:37:58
|
version 1.1.1.2, 2013/07/22 01:22:19
|
Line 727 static const char* const map_contents[] = { BLOCK, "ar
|
Line 727 static const char* const map_contents[] = { BLOCK, "ar
|
static const char* const name_attr[] = { "name", NULL } ; |
static const char* const name_attr[] = { "name", NULL } ; |
static const char* const action_attr[] = { "action", NULL } ; |
static const char* const action_attr[] = { "action", NULL } ; |
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ; | static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ; |
static const char* const content_attr[] = { "content", NULL } ; |
static const char* const content_attr[] = { "content", NULL } ; |
static const char* const type_attr[] = { "type", NULL } ; |
static const char* const type_attr[] = { "type", NULL } ; |
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
Line 1080 static const char * const htmlStartClose[] = {
|
Line 1080 static const char * const htmlStartClose[] = {
|
"menu", "p", "head", "ul", NULL, |
"menu", "p", "head", "ul", NULL, |
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
"div", "p", "head", NULL, |
"div", "p", "head", NULL, |
"noscript", "p", "head", NULL, | "noscript", "p", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
"a", "a", NULL, |
"a", "a", NULL, |
"caption", "p", NULL, |
"caption", "p", NULL, |
Line 3435 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **v
|
Line 3435 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **v
|
} |
} |
|
|
/** |
/** |
* htmlCheckEncoding: | * htmlCheckEncodingDirect: |
* @ctxt: an HTML parser context |
* @ctxt: an HTML parser context |
* @attvalue: the attribute value |
* @attvalue: the attribute value |
* |
* |
* Checks an http-equiv attribute from a Meta tag to detect | * Checks an attribute value to detect |
* the encoding |
* the encoding |
* If a new encoding is detected the parser is switched to decode |
* If a new encoding is detected the parser is switched to decode |
* it and pass UTF8 |
* it and pass UTF8 |
*/ |
*/ |
static void |
static void |
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { |
const xmlChar *encoding; | |
|
|
if ((ctxt == NULL) || (attvalue == NULL)) | if ((ctxt == NULL) || (encoding == NULL) || |
| (ctxt->options & HTML_PARSE_IGNORE_ENC)) |
return; |
return; |
|
|
/* do not change encoding */ |
/* do not change encoding */ |
if (ctxt->input->encoding != NULL) |
if (ctxt->input->encoding != NULL) |
return; |
return; |
|
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); |
|
if (encoding != NULL) { |
if (encoding != NULL) { |
encoding += 8; |
|
} else { |
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); |
|
if (encoding != NULL) |
|
encoding += 9; |
|
} |
|
if (encoding != NULL) { |
|
xmlCharEncoding enc; |
xmlCharEncoding enc; |
xmlCharEncodingHandlerPtr handler; |
xmlCharEncodingHandlerPtr handler; |
|
|
Line 3500 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
Line 3492 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
xmlSwitchToEncoding(ctxt, handler); |
xmlSwitchToEncoding(ctxt, handler); |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
} else { |
} else { |
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| "htmlCheckEncoding: unknown encoding %s\n", |
| encoding, NULL); |
} |
} |
} |
} |
|
|
Line 3533 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
Line 3527 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlCha
|
} |
} |
|
|
/** |
/** |
|
* htmlCheckEncoding: |
|
* @ctxt: an HTML parser context |
|
* @attvalue: the attribute value |
|
* |
|
* Checks an http-equiv attribute from a Meta tag to detect |
|
* the encoding |
|
* If a new encoding is detected the parser is switched to decode |
|
* it and pass UTF8 |
|
*/ |
|
static void |
|
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
|
const xmlChar *encoding; |
|
|
|
if (!attvalue) |
|
return; |
|
|
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset"); |
|
if (encoding != NULL) { |
|
encoding += 7; |
|
} |
|
/* |
|
* skip blank |
|
*/ |
|
if (encoding && IS_BLANK_CH(*encoding)) |
|
encoding = xmlStrcasestr(attvalue, BAD_CAST"="); |
|
if (encoding && *encoding == '=') { |
|
encoding ++; |
|
htmlCheckEncodingDirect(ctxt, encoding); |
|
} |
|
} |
|
|
|
/** |
* htmlCheckMeta: |
* htmlCheckMeta: |
* @ctxt: an HTML parser context |
* @ctxt: an HTML parser context |
* @atts: the attributes values |
* @atts: the attributes values |
Line 3556 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **
|
Line 3582 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **
|
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
http = 1; |
http = 1; |
|
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) |
|
htmlCheckEncodingDirect(ctxt, value); |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
content = value; |
content = value; |
att = atts[i++]; |
att = atts[i++]; |
Line 3885 htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
Line 3913 htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
ctxt->sax->endElement(ctxt->userData, name); |
ctxt->sax->endElement(ctxt->userData, name); |
|
htmlNodeInfoPop(ctxt); |
htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
ret = 1; |
ret = 1; |
} else { |
} else { |
Line 5173 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5202 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
int avail = 0; |
int avail = 0; |
xmlChar cur, next; |
xmlChar cur, next; |
|
|
|
htmlParserNodeInfo node_info; |
|
|
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
switch (ctxt->instate) { |
switch (ctxt->instate) { |
case XML_PARSER_EOF: |
case XML_PARSER_EOF: |
Line 5312 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5343 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
avail = in->length - (in->cur - in->base); |
avail = in->length - (in->cur - in->base); |
else |
else |
avail = in->buf->buffer->use - (in->cur - in->base); |
avail = in->buf->buffer->use - (in->cur - in->base); |
if (avail < 2) | /* |
| * no chars in buffer |
| */ |
| if (avail < 1) |
goto done; |
goto done; |
|
/* |
|
* not enouth chars in buffer |
|
*/ |
|
if (avail < 2) { |
|
if (!terminate) |
|
goto done; |
|
else |
|
next = ' '; |
|
} else { |
|
next = in->cur[1]; |
|
} |
cur = in->cur[0]; |
cur = in->cur[0]; |
next = in->cur[1]; |
|
if ((cur == '<') && (next == '!') && |
if ((cur == '<') && (next == '!') && |
(in->cur[2] == '-') && (in->cur[3] == '-')) { |
(in->cur[2] == '-') && (in->cur[3] == '-')) { |
if ((!terminate) && |
if ((!terminate) && |
Line 5465 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5509 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
int failed; |
int failed; |
const htmlElemDesc * info; |
const htmlElemDesc * info; |
|
|
if (avail < 2) | /* |
| * no chars in buffer |
| */ |
| if (avail < 1) |
goto done; |
goto done; |
|
/* |
|
* not enouth chars in buffer |
|
*/ |
|
if (avail < 2) { |
|
if (!terminate) |
|
goto done; |
|
else |
|
next = ' '; |
|
} else { |
|
next = in->cur[1]; |
|
} |
cur = in->cur[0]; |
cur = in->cur[0]; |
if (cur != '<') { |
if (cur != '<') { |
ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
Line 5476 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5534 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
#endif |
#endif |
break; |
break; |
} |
} |
if (in->cur[1] == '/') { | if (next == '/') { |
ctxt->instate = XML_PARSER_END_TAG; |
ctxt->instate = XML_PARSER_END_TAG; |
ctxt->checkIndex = 0; |
ctxt->checkIndex = 0; |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
Line 5489 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5547 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
goto done; |
goto done; |
|
|
|
/* Capture start position */ |
|
if (ctxt->record_info) { |
|
node_info.begin_pos = ctxt->input->consumed + |
|
(CUR_PTR - ctxt->input->base); |
|
node_info.begin_line = ctxt->input->line; |
|
} |
|
|
|
|
failed = htmlParseStartTag(ctxt); |
failed = htmlParseStartTag(ctxt); |
name = ctxt->name; |
name = ctxt->name; |
if ((failed == -1) || |
if ((failed == -1) || |
Line 5538 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5604 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
} |
} |
|
|
|
if (ctxt->record_info) |
|
htmlNodeInfoPush(ctxt, &node_info); |
|
|
ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, |
xmlGenericError(xmlGenericErrorContext, |
Line 5554 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
Line 5623 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int termi
|
ctxt->sax->endElement(ctxt->userData, name); |
ctxt->sax->endElement(ctxt->userData, name); |
htmlnamePop(ctxt); |
htmlnamePop(ctxt); |
} |
} |
|
|
|
if (ctxt->record_info) |
|
htmlNodeInfoPush(ctxt, &node_info); |
|
|
ctxt->instate = XML_PARSER_CONTENT; |
ctxt->instate = XML_PARSER_CONTENT; |
#ifdef DEBUG_PUSH |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, |
xmlGenericError(xmlGenericErrorContext, |
Line 6537 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options
|
Line 6610 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options
|
ctxt->options |= HTML_PARSE_NODEFDTD; |
ctxt->options |= HTML_PARSE_NODEFDTD; |
options -= HTML_PARSE_NODEFDTD; |
options -= HTML_PARSE_NODEFDTD; |
} |
} |
|
if (options & HTML_PARSE_IGNORE_ENC) { |
|
ctxt->options |= HTML_PARSE_IGNORE_ENC; |
|
options -= HTML_PARSE_IGNORE_ENC; |
|
} |
|
if (options & HTML_PARSE_NOIMPLIED) { |
|
ctxt->options |= HTML_PARSE_NOIMPLIED; |
|
options -= HTML_PARSE_NOIMPLIED; |
|
} |
ctxt->dictNames = 0; |
ctxt->dictNames = 0; |
return (options); |
return (options); |
} |
} |
Line 6730 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseC
|
Line 6811 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseC
|
|
|
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
XML_CHAR_ENCODING_NONE); |
XML_CHAR_ENCODING_NONE); |
if (input == NULL) | if (input == NULL) { |
| if (ioclose != NULL) |
| ioclose(ioctx); |
return (NULL); |
return (NULL); |
|
} |
ctxt = htmlNewParserCtxt(); |
ctxt = htmlNewParserCtxt(); |
if (ctxt == NULL) { |
if (ctxt == NULL) { |
xmlFreeParserInputBuffer(input); |
xmlFreeParserInputBuffer(input); |
Line 6930 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCal
|
Line 7014 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCal
|
|
|
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
XML_CHAR_ENCODING_NONE); |
XML_CHAR_ENCODING_NONE); |
if (input == NULL) | if (input == NULL) { |
| if (ioclose != NULL) |
| ioclose(ioctx); |
return (NULL); |
return (NULL); |
|
} |
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
if (stream == NULL) { |
if (stream == NULL) { |
xmlFreeParserInputBuffer(input); |
xmlFreeParserInputBuffer(input); |