version 1.1.1.1, 2012/02/21 23:48:05
|
version 1.1.1.4, 2013/10/14 08:02:42
|
Line 2
|
Line 2
|
+----------------------------------------------------------------------+ |
+----------------------------------------------------------------------+ |
| PHP Version 5 | |
| PHP Version 5 | |
+----------------------------------------------------------------------+ |
+----------------------------------------------------------------------+ |
| Copyright (c) 1997-2012 The PHP Group | | | Copyright (c) 1997-2013 The PHP Group | |
+----------------------------------------------------------------------+ |
+----------------------------------------------------------------------+ |
| This source file is subject to version 3.01 of the PHP license, | |
| This source file is subject to version 3.01 of the PHP license, | |
| that is bundled with this package in the file LICENSE, and is | |
| that is bundled with this package in the file LICENSE, and is | |
Line 32
|
Line 32
|
#include "zend_variables.h" |
#include "zend_variables.h" |
#include "ext/standard/php_string.h" |
#include "ext/standard/php_string.h" |
#include "ext/standard/info.h" |
#include "ext/standard/info.h" |
|
#include "ext/standard/html.h" |
|
|
#if HAVE_XML |
#if HAVE_XML |
|
|
Line 273 zend_module_entry xml_module_entry = {
|
Line 274 zend_module_entry xml_module_entry = {
|
* the encoding is currently done internally by expat/xmltok. |
* the encoding is currently done internally by expat/xmltok. |
*/ |
*/ |
xml_encoding xml_encodings[] = { |
xml_encoding xml_encodings[] = { |
{ "ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 }, | { (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 }, |
{ "US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii }, | { (XML_Char *)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii }, |
{ "UTF-8", NULL, NULL }, | { (XML_Char *)"UTF-8", NULL, NULL }, |
{ NULL, NULL, NULL } | { (XML_Char *)NULL, NULL, NULL } |
}; |
}; |
|
|
static XML_Memory_Handling_Suite php_xml_mem_hdlrs; |
static XML_Memory_Handling_Suite php_xml_mem_hdlrs; |
Line 427 static void xml_parser_dtor(zend_rsrc_list_entry *rsrc
|
Line 428 static void xml_parser_dtor(zend_rsrc_list_entry *rsrc
|
} |
} |
if (parser->ltags) { |
if (parser->ltags) { |
int inx; |
int inx; |
for (inx = 0; inx < parser->level; inx++) | for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++) |
efree(parser->ltags[ inx ]); |
efree(parser->ltags[ inx ]); |
efree(parser->ltags); |
efree(parser->ltags); |
} |
} |
Line 659 PHPAPI char *xml_utf8_encode(const char *s, int len, i
|
Line 660 PHPAPI char *xml_utf8_encode(const char *s, int len, i
|
} |
} |
/* }}} */ |
/* }}} */ |
|
|
/* copied from trunk's implementation of get_next_char in ext/standard/html.c */ |
|
#define MB_FAILURE(pos, advance) do { \ |
|
*cursor = pos + (advance); \ |
|
*status = FAILURE; \ |
|
return 0; \ |
|
} while (0) |
|
|
|
#define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need)) |
|
#define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4)) |
|
#define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF) |
|
|
|
/* {{{ php_next_utf8_char |
|
*/ |
|
static inline unsigned int php_next_utf8_char( |
|
const unsigned char *str, |
|
size_t str_len, |
|
size_t *cursor, |
|
int *status) |
|
{ |
|
size_t pos = *cursor; |
|
unsigned int this_char = 0; |
|
unsigned char c; |
|
|
|
*status = SUCCESS; |
|
|
|
if (!CHECK_LEN(pos, 1)) |
|
MB_FAILURE(pos, 1); |
|
|
|
/* We'll follow strategy 2. from section 3.6.1 of UTR #36: |
|
* "In a reported illegal byte sequence, do not include any |
|
* non-initial byte that encodes a valid character or is a leading |
|
* byte for a valid sequence.» */ |
|
c = str[pos]; |
|
if (c < 0x80) { |
|
this_char = c; |
|
pos++; |
|
} else if (c < 0xc2) { |
|
MB_FAILURE(pos, 1); |
|
} else if (c < 0xe0) { |
|
if (!CHECK_LEN(pos, 2)) |
|
MB_FAILURE(pos, 1); |
|
|
|
if (!utf8_trail(str[pos + 1])) { |
|
MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2); |
|
} |
|
this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); |
|
if (this_char < 0x80) { /* non-shortest form */ |
|
MB_FAILURE(pos, 2); |
|
} |
|
pos += 2; |
|
} else if (c < 0xf0) { |
|
size_t avail = str_len - pos; |
|
|
|
if (avail < 3 || |
|
!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) { |
|
if (avail < 2 || utf8_lead(str[pos + 1])) |
|
MB_FAILURE(pos, 1); |
|
else if (avail < 3 || utf8_lead(str[pos + 2])) |
|
MB_FAILURE(pos, 2); |
|
else |
|
MB_FAILURE(pos, 3); |
|
} |
|
|
|
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); |
|
if (this_char < 0x800) { /* non-shortest form */ |
|
MB_FAILURE(pos, 3); |
|
} else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */ |
|
MB_FAILURE(pos, 3); |
|
} |
|
pos += 3; |
|
} else if (c < 0xf5) { |
|
size_t avail = str_len - pos; |
|
|
|
if (avail < 4 || |
|
!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || |
|
!utf8_trail(str[pos + 3])) { |
|
if (avail < 2 || utf8_lead(str[pos + 1])) |
|
MB_FAILURE(pos, 1); |
|
else if (avail < 3 || utf8_lead(str[pos + 2])) |
|
MB_FAILURE(pos, 2); |
|
else if (avail < 4 || utf8_lead(str[pos + 3])) |
|
MB_FAILURE(pos, 3); |
|
else |
|
MB_FAILURE(pos, 4); |
|
} |
|
|
|
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); |
|
if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ |
|
MB_FAILURE(pos, 4); |
|
} |
|
pos += 4; |
|
} else { |
|
MB_FAILURE(pos, 1); |
|
} |
|
|
|
*cursor = pos; |
|
return this_char; |
|
} |
|
/* }}} */ |
|
|
|
|
|
/* {{{ xml_utf8_decode */ |
/* {{{ xml_utf8_decode */ |
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |
{ |
{ |
Line 905 void _xml_startElementHandler(void *userData, const XM
|
Line 805 void _xml_startElementHandler(void *userData, const XM
|
} |
} |
|
|
if (parser->data) { |
if (parser->data) { |
zval *tag, *atr; | if (parser->level <= XML_MAXLEVEL) { |
int atcnt = 0; | zval *tag, *atr; |
| int atcnt = 0; |
|
|
MAKE_STD_ZVAL(tag); | MAKE_STD_ZVAL(tag); |
MAKE_STD_ZVAL(atr); | MAKE_STD_ZVAL(atr); |
|
|
array_init(tag); | array_init(tag); |
array_init(atr); | array_init(atr); |
|
|
_xml_add_to_info(parser,((char *) tag_name) + parser->toffset); | _xml_add_to_info(parser,((char *) tag_name) + parser->toffset); |
|
|
add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */ | add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */ |
add_assoc_string(tag,"type","open",1); | add_assoc_string(tag,"type","open",1); |
add_assoc_long(tag,"level",parser->level); | add_assoc_long(tag,"level",parser->level); |
|
|
parser->ltags[parser->level-1] = estrdup(tag_name); | parser->ltags[parser->level-1] = estrdup(tag_name); |
parser->lastwasopen = 1; | parser->lastwasopen = 1; |
|
|
attributes = (const XML_Char **) attrs; | attributes = (const XML_Char **) attrs; |
|
|
while (attributes && *attributes) { | while (attributes && *attributes) { |
att = _xml_decode_tag(parser, attributes[0]); | att = _xml_decode_tag(parser, attributes[0]); |
val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding); | val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding); |
| |
add_assoc_stringl(atr,att,val,val_len,0); | |
|
|
atcnt++; | add_assoc_stringl(atr,att,val,val_len,0); |
attributes += 2; | |
|
|
efree(att); | atcnt++; |
} | attributes += 2; |
|
|
if (atcnt) { | efree(att); |
zend_hash_add(Z_ARRVAL_P(tag),"attributes",sizeof("attributes"),&atr,sizeof(zval*),NULL); | } |
} else { | |
zval_ptr_dtor(&atr); | |
} | |
|
|
zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),(void *) &parser->ctag); | if (atcnt) { |
| zend_hash_add(Z_ARRVAL_P(tag),"attributes",sizeof("attributes"),&atr,sizeof(zval*),NULL); |
| } else { |
| zval_ptr_dtor(&atr); |
| } |
| |
| zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),(void *) &parser->ctag); |
| } else if (parser->level == (XML_MAXLEVEL + 1)) { |
| TSRMLS_FETCH(); |
| php_error_docref(NULL TSRMLS_CC, E_WARNING, "Maximum depth exceeded - Results truncated"); |
| } |
} |
} |
|
|
efree(tag_name); |
efree(tag_name); |
Line 995 void _xml_endElementHandler(void *userData, const XML_
|
Line 900 void _xml_endElementHandler(void *userData, const XML_
|
|
|
efree(tag_name); |
efree(tag_name); |
|
|
if (parser->ltags) { | if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) { |
efree(parser->ltags[parser->level-1]); |
efree(parser->ltags[parser->level-1]); |
} |
} |
|
|
Line 1079 void _xml_characterDataHandler(void *userData, const X
|
Line 984 void _xml_characterDataHandler(void *userData, const X
|
} |
} |
} |
} |
|
|
MAKE_STD_ZVAL(tag); | if (parser->level <= XML_MAXLEVEL) { |
| MAKE_STD_ZVAL(tag); |
array_init(tag); | |
| |
_xml_add_to_info(parser,parser->ltags[parser->level-1] + parser->toffset); | |
|
|
add_assoc_string(tag,"tag",parser->ltags[parser->level-1] + parser->toffset,1); | array_init(tag); |
add_assoc_string(tag,"value",decoded_value,0); | |
add_assoc_string(tag,"type","cdata",1); | |
add_assoc_long(tag,"level",parser->level); | |
|
|
zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),NULL); | _xml_add_to_info(parser,parser->ltags[parser->level-1] + parser->toffset); |
| |
| add_assoc_string(tag,"tag",parser->ltags[parser->level-1] + parser->toffset,1); |
| add_assoc_string(tag,"value",decoded_value,0); |
| add_assoc_string(tag,"type","cdata",1); |
| add_assoc_long(tag,"level",parser->level); |
| |
| zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),NULL); |
| } else if (parser->level == (XML_MAXLEVEL + 1)) { |
| TSRMLS_FETCH(); |
| php_error_docref(NULL TSRMLS_CC, E_WARNING, "Maximum depth exceeded - Results truncated"); |
| } |
} |
} |
} else { |
} else { |
efree(decoded_value); |
efree(decoded_value); |