|
|
| version 1.1.1.1, 2012/02/21 23:48:05 | version 1.1.1.3, 2013/07/22 01:32:10 |
|---|---|
| Line 2 | Line 2 |
| +----------------------------------------------------------------------+ | +----------------------------------------------------------------------+ |
| | PHP Version 5 | | | PHP Version 5 | |
| +----------------------------------------------------------------------+ | +----------------------------------------------------------------------+ |
| | Copyright (c) 1997-2012 The PHP Group | | | Copyright (c) 1997-2013 The PHP Group | |
| +----------------------------------------------------------------------+ | +----------------------------------------------------------------------+ |
| | This source file is subject to version 3.01 of the PHP license, | | | This source file is subject to version 3.01 of the PHP license, | |
| | that is bundled with this package in the file LICENSE, and is | | | that is bundled with this package in the file LICENSE, and is | |
| Line 32 | Line 32 |
| #include "zend_variables.h" | #include "zend_variables.h" |
| #include "ext/standard/php_string.h" | #include "ext/standard/php_string.h" |
| #include "ext/standard/info.h" | #include "ext/standard/info.h" |
| #include "ext/standard/html.h" | |
| #if HAVE_XML | #if HAVE_XML |
| Line 658 PHPAPI char *xml_utf8_encode(const char *s, int len, i | Line 659 PHPAPI char *xml_utf8_encode(const char *s, int len, i |
| return newbuf; | return newbuf; |
| } | } |
| /* }}} */ | /* }}} */ |
| /* copied from trunk's implementation of get_next_char in ext/standard/html.c */ | |
| #define MB_FAILURE(pos, advance) do { \ | |
| *cursor = pos + (advance); \ | |
| *status = FAILURE; \ | |
| return 0; \ | |
| } while (0) | |
| #define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need)) | |
| #define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4)) | |
| #define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF) | |
| /* {{{ php_next_utf8_char | |
| */ | |
| static inline unsigned int php_next_utf8_char( | |
| const unsigned char *str, | |
| size_t str_len, | |
| size_t *cursor, | |
| int *status) | |
| { | |
| size_t pos = *cursor; | |
| unsigned int this_char = 0; | |
| unsigned char c; | |
| *status = SUCCESS; | |
| if (!CHECK_LEN(pos, 1)) | |
| MB_FAILURE(pos, 1); | |
| /* We'll follow strategy 2. from section 3.6.1 of UTR #36: | |
| * "In a reported illegal byte sequence, do not include any | |
| * non-initial byte that encodes a valid character or is a leading | |
| * byte for a valid sequence.» */ | |
| c = str[pos]; | |
| if (c < 0x80) { | |
| this_char = c; | |
| pos++; | |
| } else if (c < 0xc2) { | |
| MB_FAILURE(pos, 1); | |
| } else if (c < 0xe0) { | |
| if (!CHECK_LEN(pos, 2)) | |
| MB_FAILURE(pos, 1); | |
| if (!utf8_trail(str[pos + 1])) { | |
| MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2); | |
| } | |
| this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); | |
| if (this_char < 0x80) { /* non-shortest form */ | |
| MB_FAILURE(pos, 2); | |
| } | |
| pos += 2; | |
| } else if (c < 0xf0) { | |
| size_t avail = str_len - pos; | |
| if (avail < 3 || | |
| !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) { | |
| if (avail < 2 || utf8_lead(str[pos + 1])) | |
| MB_FAILURE(pos, 1); | |
| else if (avail < 3 || utf8_lead(str[pos + 2])) | |
| MB_FAILURE(pos, 2); | |
| else | |
| MB_FAILURE(pos, 3); | |
| } | |
| this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); | |
| if (this_char < 0x800) { /* non-shortest form */ | |
| MB_FAILURE(pos, 3); | |
| } else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */ | |
| MB_FAILURE(pos, 3); | |
| } | |
| pos += 3; | |
| } else if (c < 0xf5) { | |
| size_t avail = str_len - pos; | |
| if (avail < 4 || | |
| !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || | |
| !utf8_trail(str[pos + 3])) { | |
| if (avail < 2 || utf8_lead(str[pos + 1])) | |
| MB_FAILURE(pos, 1); | |
| else if (avail < 3 || utf8_lead(str[pos + 2])) | |
| MB_FAILURE(pos, 2); | |
| else if (avail < 4 || utf8_lead(str[pos + 3])) | |
| MB_FAILURE(pos, 3); | |
| else | |
| MB_FAILURE(pos, 4); | |
| } | |
| this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); | |
| if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ | |
| MB_FAILURE(pos, 4); | |
| } | |
| pos += 4; | |
| } else { | |
| MB_FAILURE(pos, 1); | |
| } | |
| *cursor = pos; | |
| return this_char; | |
| } | |
| /* }}} */ | |
| /* {{{ xml_utf8_decode */ | /* {{{ xml_utf8_decode */ |
| PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) | PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |