|
|
| version 1.1.1.1, 2012/02/21 23:48:05 | version 1.1.1.4, 2013/10/14 08:02:42 |
|---|---|
| Line 2 | Line 2 |
| +----------------------------------------------------------------------+ | +----------------------------------------------------------------------+ |
| | PHP Version 5 | | | PHP Version 5 | |
| +----------------------------------------------------------------------+ | +----------------------------------------------------------------------+ |
| | Copyright (c) 1997-2012 The PHP Group | | | Copyright (c) 1997-2013 The PHP Group | |
| +----------------------------------------------------------------------+ | +----------------------------------------------------------------------+ |
| | This source file is subject to version 3.01 of the PHP license, | | | This source file is subject to version 3.01 of the PHP license, | |
| | that is bundled with this package in the file LICENSE, and is | | | that is bundled with this package in the file LICENSE, and is | |
| Line 32 | Line 32 |
| #include "zend_variables.h" | #include "zend_variables.h" |
| #include "ext/standard/php_string.h" | #include "ext/standard/php_string.h" |
| #include "ext/standard/info.h" | #include "ext/standard/info.h" |
| #include "ext/standard/html.h" | |
| #if HAVE_XML | #if HAVE_XML |
| Line 273 zend_module_entry xml_module_entry = { | Line 274 zend_module_entry xml_module_entry = { |
| * the encoding is currently done internally by expat/xmltok. | * the encoding is currently done internally by expat/xmltok. |
| */ | */ |
| xml_encoding xml_encodings[] = { | xml_encoding xml_encodings[] = { |
| { "ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 }, | { (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 }, |
| { "US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii }, | { (XML_Char *)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii }, |
| { "UTF-8", NULL, NULL }, | { (XML_Char *)"UTF-8", NULL, NULL }, |
| { NULL, NULL, NULL } | { (XML_Char *)NULL, NULL, NULL } |
| }; | }; |
| static XML_Memory_Handling_Suite php_xml_mem_hdlrs; | static XML_Memory_Handling_Suite php_xml_mem_hdlrs; |
| Line 427 static void xml_parser_dtor(zend_rsrc_list_entry *rsrc | Line 428 static void xml_parser_dtor(zend_rsrc_list_entry *rsrc |
| } | } |
| if (parser->ltags) { | if (parser->ltags) { |
| int inx; | int inx; |
| for (inx = 0; inx < parser->level; inx++) | for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++) |
| efree(parser->ltags[ inx ]); | efree(parser->ltags[ inx ]); |
| efree(parser->ltags); | efree(parser->ltags); |
| } | } |
| Line 659 PHPAPI char *xml_utf8_encode(const char *s, int len, i | Line 660 PHPAPI char *xml_utf8_encode(const char *s, int len, i |
| } | } |
| /* }}} */ | /* }}} */ |
| /* copied from trunk's implementation of get_next_char in ext/standard/html.c */ | |
| #define MB_FAILURE(pos, advance) do { \ | |
| *cursor = pos + (advance); \ | |
| *status = FAILURE; \ | |
| return 0; \ | |
| } while (0) | |
| #define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need)) | |
| #define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4)) | |
| #define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF) | |
| /* {{{ php_next_utf8_char | |
| */ | |
| static inline unsigned int php_next_utf8_char( | |
| const unsigned char *str, | |
| size_t str_len, | |
| size_t *cursor, | |
| int *status) | |
| { | |
| size_t pos = *cursor; | |
| unsigned int this_char = 0; | |
| unsigned char c; | |
| *status = SUCCESS; | |
| if (!CHECK_LEN(pos, 1)) | |
| MB_FAILURE(pos, 1); | |
| /* We'll follow strategy 2. from section 3.6.1 of UTR #36: | |
| * "In a reported illegal byte sequence, do not include any | |
| * non-initial byte that encodes a valid character or is a leading | |
| * byte for a valid sequence.» */ | |
| c = str[pos]; | |
| if (c < 0x80) { | |
| this_char = c; | |
| pos++; | |
| } else if (c < 0xc2) { | |
| MB_FAILURE(pos, 1); | |
| } else if (c < 0xe0) { | |
| if (!CHECK_LEN(pos, 2)) | |
| MB_FAILURE(pos, 1); | |
| if (!utf8_trail(str[pos + 1])) { | |
| MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2); | |
| } | |
| this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); | |
| if (this_char < 0x80) { /* non-shortest form */ | |
| MB_FAILURE(pos, 2); | |
| } | |
| pos += 2; | |
| } else if (c < 0xf0) { | |
| size_t avail = str_len - pos; | |
| if (avail < 3 || | |
| !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) { | |
| if (avail < 2 || utf8_lead(str[pos + 1])) | |
| MB_FAILURE(pos, 1); | |
| else if (avail < 3 || utf8_lead(str[pos + 2])) | |
| MB_FAILURE(pos, 2); | |
| else | |
| MB_FAILURE(pos, 3); | |
| } | |
| this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); | |
| if (this_char < 0x800) { /* non-shortest form */ | |
| MB_FAILURE(pos, 3); | |
| } else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */ | |
| MB_FAILURE(pos, 3); | |
| } | |
| pos += 3; | |
| } else if (c < 0xf5) { | |
| size_t avail = str_len - pos; | |
| if (avail < 4 || | |
| !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || | |
| !utf8_trail(str[pos + 3])) { | |
| if (avail < 2 || utf8_lead(str[pos + 1])) | |
| MB_FAILURE(pos, 1); | |
| else if (avail < 3 || utf8_lead(str[pos + 2])) | |
| MB_FAILURE(pos, 2); | |
| else if (avail < 4 || utf8_lead(str[pos + 3])) | |
| MB_FAILURE(pos, 3); | |
| else | |
| MB_FAILURE(pos, 4); | |
| } | |
| this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); | |
| if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ | |
| MB_FAILURE(pos, 4); | |
| } | |
| pos += 4; | |
| } else { | |
| MB_FAILURE(pos, 1); | |
| } | |
| *cursor = pos; | |
| return this_char; | |
| } | |
| /* }}} */ | |
| /* {{{ xml_utf8_decode */ | /* {{{ xml_utf8_decode */ |
| PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) | PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |
| { | { |
| Line 905 void _xml_startElementHandler(void *userData, const XM | Line 805 void _xml_startElementHandler(void *userData, const XM |
| } | } |
| if (parser->data) { | if (parser->data) { |
| zval *tag, *atr; | if (parser->level <= XML_MAXLEVEL) { |
| int atcnt = 0; | zval *tag, *atr; |
| int atcnt = 0; | |
| MAKE_STD_ZVAL(tag); | MAKE_STD_ZVAL(tag); |
| MAKE_STD_ZVAL(atr); | MAKE_STD_ZVAL(atr); |
| array_init(tag); | array_init(tag); |
| array_init(atr); | array_init(atr); |
| _xml_add_to_info(parser,((char *) tag_name) + parser->toffset); | _xml_add_to_info(parser,((char *) tag_name) + parser->toffset); |
| add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */ | add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */ |
| add_assoc_string(tag,"type","open",1); | add_assoc_string(tag,"type","open",1); |
| add_assoc_long(tag,"level",parser->level); | add_assoc_long(tag,"level",parser->level); |
| parser->ltags[parser->level-1] = estrdup(tag_name); | parser->ltags[parser->level-1] = estrdup(tag_name); |
| parser->lastwasopen = 1; | parser->lastwasopen = 1; |
| attributes = (const XML_Char **) attrs; | attributes = (const XML_Char **) attrs; |
| while (attributes && *attributes) { | while (attributes && *attributes) { |
| att = _xml_decode_tag(parser, attributes[0]); | att = _xml_decode_tag(parser, attributes[0]); |
| val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding); | val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding); |
| add_assoc_stringl(atr,att,val,val_len,0); | |
| atcnt++; | add_assoc_stringl(atr,att,val,val_len,0); |
| attributes += 2; | |
| efree(att); | atcnt++; |
| } | attributes += 2; |
| if (atcnt) { | efree(att); |
| zend_hash_add(Z_ARRVAL_P(tag),"attributes",sizeof("attributes"),&atr,sizeof(zval*),NULL); | } |
| } else { | |
| zval_ptr_dtor(&atr); | |
| } | |
| zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),(void *) &parser->ctag); | if (atcnt) { |
| zend_hash_add(Z_ARRVAL_P(tag),"attributes",sizeof("attributes"),&atr,sizeof(zval*),NULL); | |
| } else { | |
| zval_ptr_dtor(&atr); | |
| } | |
| zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),(void *) &parser->ctag); | |
| } else if (parser->level == (XML_MAXLEVEL + 1)) { | |
| TSRMLS_FETCH(); | |
| php_error_docref(NULL TSRMLS_CC, E_WARNING, "Maximum depth exceeded - Results truncated"); | |
| } | |
| } | } |
| efree(tag_name); | efree(tag_name); |
| Line 995 void _xml_endElementHandler(void *userData, const XML_ | Line 900 void _xml_endElementHandler(void *userData, const XML_ |
| efree(tag_name); | efree(tag_name); |
| if (parser->ltags) { | if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) { |
| efree(parser->ltags[parser->level-1]); | efree(parser->ltags[parser->level-1]); |
| } | } |
| Line 1079 void _xml_characterDataHandler(void *userData, const X | Line 984 void _xml_characterDataHandler(void *userData, const X |
| } | } |
| } | } |
| MAKE_STD_ZVAL(tag); | if (parser->level <= XML_MAXLEVEL) { |
| MAKE_STD_ZVAL(tag); | |
| array_init(tag); | |
| _xml_add_to_info(parser,parser->ltags[parser->level-1] + parser->toffset); | |
| add_assoc_string(tag,"tag",parser->ltags[parser->level-1] + parser->toffset,1); | array_init(tag); |
| add_assoc_string(tag,"value",decoded_value,0); | |
| add_assoc_string(tag,"type","cdata",1); | |
| add_assoc_long(tag,"level",parser->level); | |
| zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),NULL); | _xml_add_to_info(parser,parser->ltags[parser->level-1] + parser->toffset); |
| add_assoc_string(tag,"tag",parser->ltags[parser->level-1] + parser->toffset,1); | |
| add_assoc_string(tag,"value",decoded_value,0); | |
| add_assoc_string(tag,"type","cdata",1); | |
| add_assoc_long(tag,"level",parser->level); | |
| zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),NULL); | |
| } else if (parser->level == (XML_MAXLEVEL + 1)) { | |
| TSRMLS_FETCH(); | |
| php_error_docref(NULL TSRMLS_CC, E_WARNING, "Maximum depth exceeded - Results truncated"); | |
| } | |
| } | } |
| } else { | } else { |
| efree(decoded_value); | efree(decoded_value); |