version 1.1.1.1, 2012/02/21 23:48:05
|
version 1.1.1.3, 2013/07/22 01:32:10
|
Line 2
|
Line 2
|
+----------------------------------------------------------------------+ |
+----------------------------------------------------------------------+ |
| PHP Version 5 | |
| PHP Version 5 | |
+----------------------------------------------------------------------+ |
+----------------------------------------------------------------------+ |
| Copyright (c) 1997-2012 The PHP Group | | | Copyright (c) 1997-2013 The PHP Group | |
+----------------------------------------------------------------------+ |
+----------------------------------------------------------------------+ |
| This source file is subject to version 3.01 of the PHP license, | |
| This source file is subject to version 3.01 of the PHP license, | |
| that is bundled with this package in the file LICENSE, and is | |
| that is bundled with this package in the file LICENSE, and is | |
Line 32
|
Line 32
|
#include "zend_variables.h" |
#include "zend_variables.h" |
#include "ext/standard/php_string.h" |
#include "ext/standard/php_string.h" |
#include "ext/standard/info.h" |
#include "ext/standard/info.h" |
|
#include "ext/standard/html.h" |
|
|
#if HAVE_XML |
#if HAVE_XML |
|
|
Line 658 PHPAPI char *xml_utf8_encode(const char *s, int len, i
|
Line 659 PHPAPI char *xml_utf8_encode(const char *s, int len, i
|
return newbuf; |
return newbuf; |
} |
} |
/* }}} */ |
/* }}} */ |
|
|
/* copied from trunk's implementation of get_next_char in ext/standard/html.c */ |
|
#define MB_FAILURE(pos, advance) do { \ |
|
*cursor = pos + (advance); \ |
|
*status = FAILURE; \ |
|
return 0; \ |
|
} while (0) |
|
|
|
#define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need)) |
|
#define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4)) |
|
#define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF) |
|
|
|
/* {{{ php_next_utf8_char |
|
*/ |
|
static inline unsigned int php_next_utf8_char( |
|
const unsigned char *str, |
|
size_t str_len, |
|
size_t *cursor, |
|
int *status) |
|
{ |
|
size_t pos = *cursor; |
|
unsigned int this_char = 0; |
|
unsigned char c; |
|
|
|
*status = SUCCESS; |
|
|
|
if (!CHECK_LEN(pos, 1)) |
|
MB_FAILURE(pos, 1); |
|
|
|
/* We'll follow strategy 2. from section 3.6.1 of UTR #36: |
|
* "In a reported illegal byte sequence, do not include any |
|
* non-initial byte that encodes a valid character or is a leading |
|
* byte for a valid sequence.» */ |
|
c = str[pos]; |
|
if (c < 0x80) { |
|
this_char = c; |
|
pos++; |
|
} else if (c < 0xc2) { |
|
MB_FAILURE(pos, 1); |
|
} else if (c < 0xe0) { |
|
if (!CHECK_LEN(pos, 2)) |
|
MB_FAILURE(pos, 1); |
|
|
|
if (!utf8_trail(str[pos + 1])) { |
|
MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2); |
|
} |
|
this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); |
|
if (this_char < 0x80) { /* non-shortest form */ |
|
MB_FAILURE(pos, 2); |
|
} |
|
pos += 2; |
|
} else if (c < 0xf0) { |
|
size_t avail = str_len - pos; |
|
|
|
if (avail < 3 || |
|
!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) { |
|
if (avail < 2 || utf8_lead(str[pos + 1])) |
|
MB_FAILURE(pos, 1); |
|
else if (avail < 3 || utf8_lead(str[pos + 2])) |
|
MB_FAILURE(pos, 2); |
|
else |
|
MB_FAILURE(pos, 3); |
|
} |
|
|
|
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); |
|
if (this_char < 0x800) { /* non-shortest form */ |
|
MB_FAILURE(pos, 3); |
|
} else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */ |
|
MB_FAILURE(pos, 3); |
|
} |
|
pos += 3; |
|
} else if (c < 0xf5) { |
|
size_t avail = str_len - pos; |
|
|
|
if (avail < 4 || |
|
!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || |
|
!utf8_trail(str[pos + 3])) { |
|
if (avail < 2 || utf8_lead(str[pos + 1])) |
|
MB_FAILURE(pos, 1); |
|
else if (avail < 3 || utf8_lead(str[pos + 2])) |
|
MB_FAILURE(pos, 2); |
|
else if (avail < 4 || utf8_lead(str[pos + 3])) |
|
MB_FAILURE(pos, 3); |
|
else |
|
MB_FAILURE(pos, 4); |
|
} |
|
|
|
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); |
|
if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ |
|
MB_FAILURE(pos, 4); |
|
} |
|
pos += 4; |
|
} else { |
|
MB_FAILURE(pos, 1); |
|
} |
|
|
|
*cursor = pos; |
|
return this_char; |
|
} |
|
/* }}} */ |
|
|
|
|
|
/* {{{ xml_utf8_decode */ |
/* {{{ xml_utf8_decode */ |
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |