--- embedaddon/php/ext/mbstring/mbstring.c 2012/02/21 23:47:57 1.1.1.1 +++ embedaddon/php/ext/mbstring/mbstring.c 2014/06/15 20:03:49 1.1.1.4 @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2012 The PHP Group | + | Copyright (c) 1997-2014 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: mbstring.c,v 1.1.1.1 2012/02/21 23:47:57 misho Exp $ */ +/* $Id: mbstring.c,v 1.1.1.4 2014/06/15 20:03:49 misho Exp $ */ /* * PHP 4 Multibyte String module "mbstring" @@ -62,6 +62,7 @@ #include "ext/standard/info.h" #include "libmbfl/mbfl/mbfl_allocators.h" +#include "libmbfl/mbfl/mbfilter_pass.h" #include "php_variables.h" #include "php_globals.h" @@ -77,9 +78,7 @@ #include "php_mbregex.h" #endif -#ifdef ZEND_MULTIBYTE #include "zend_multibyte.h" -#endif /* ZEND_MULTIBYTE */ #if HAVE_ONIG #include "php_onig_compat.h" @@ -98,19 +97,21 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring) static PHP_GINIT_FUNCTION(mbstring); static PHP_GSHUTDOWN_FUNCTION(mbstring); -#ifdef ZEND_MULTIBYTE -static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC); -static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC); -static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC); -static int php_mb_set_zend_encoding(TSRMLS_D); -#endif +static void php_mb_populate_current_detect_order_list(TSRMLS_D); + +static int php_mb_encoding_translation(TSRMLS_D); + +static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC); + +static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC); + /* }}} */ /* {{{ php_mb_default_identify_list */ typedef struct _php_mb_nls_ident_list { enum mbfl_no_language lang; - const enum mbfl_no_encoding* list; - int list_size; + const enum mbfl_no_encoding *list; + size_t list_size; } php_mb_nls_ident_list; static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { @@ -411,6 +412,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity ZEND_ARG_INFO(0, string) ZEND_ARG_INFO(0, convmap) ZEND_ARG_INFO(0, encoding) + ZEND_ARG_INFO(0, is_hex) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2) @@ -465,6 +467,13 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, ZEND_ARG_INFO(0, string) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3) + ZEND_ARG_INFO(0, pattern) + ZEND_ARG_INFO(0, callback) + ZEND_ARG_INFO(0, string) + ZEND_ARG_INFO(0, option) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2) ZEND_ARG_INFO(0, pattern) ZEND_ARG_INFO(0, string) @@ -563,7 +572,7 @@ const zend_function_entry mbstring_functions[] = { /* {{{ zend_module_entry mbstring_module_entry */ zend_module_entry mbstring_module_entry = { - STANDARD_MODULE_HEADER, + STANDARD_MODULE_HEADER, "mbstring", mbstring_functions, PHP_MINIT(mbstring), @@ -571,11 +580,11 @@ zend_module_entry mbstring_module_entry = { PHP_RINIT(mbstring), PHP_RSHUTDOWN(mbstring), PHP_MINFO(mbstring), - NO_VERSION_YET, - PHP_MODULE_GLOBALS(mbstring), - PHP_GINIT(mbstring), - PHP_GSHUTDOWN(mbstring), - NULL, + NO_VERSION_YET, + PHP_MODULE_GLOBALS(mbstring), + PHP_GINIT(mbstring), + PHP_GSHUTDOWN(mbstring), + NULL, STANDARD_MODULE_PROPERTIES_EX }; /* }}} */ @@ -653,12 +662,12 @@ static sapi_post_entry mbstr_post_entries[] = { * of parsed encodings. */ static int -php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) { - int n, l, size, bauto, ret = 1; + int size, bauto, ret = SUCCESS; + size_t n; char *p, *p1, *p2, *endp, *tmpstr; - enum mbfl_no_encoding no_encoding; - enum mbfl_no_encoding *src, *entry, *list; + const mbfl_encoding **entry, **list; list = NULL; if (value == NULL || value_length <= 0) { @@ -668,14 +677,8 @@ php_mb_parse_encoding_list(const char *value, int valu if (return_size) { *return_size = 0; } - return 0; + return FAILURE; } else { - enum mbfl_no_encoding *identify_list; - int identify_list_size; - - identify_list = MBSTRG(default_detect_order_list); - identify_list_size = MBSTRG(default_detect_order_list_size); - /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { tmpstr = (char *)estrndup(value+1, value_length-2); @@ -684,7 +687,7 @@ php_mb_parse_encoding_list(const char *value, int valu else tmpstr = (char *)estrndup(value, value_length); if (tmpstr == NULL) { - return 0; + return FAILURE; } /* count the number of listed encoding names */ endp = tmpstr + value_length; @@ -694,9 +697,9 @@ php_mb_parse_encoding_list(const char *value, int valu p1 = p2 + 1; n++; } - size = n + identify_list_size; + size = n + MBSTRG(default_detect_order_list_size); /* make list */ - list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); + list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); if (list != NULL) { entry = list; n = 0; @@ -720,19 +723,19 @@ php_mb_parse_encoding_list(const char *value, int valu /* convert to the encoding number and check encoding */ if (strcasecmp(p1, "auto") == 0) { if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t i; bauto = 1; - l = identify_list_size; - src = identify_list; - while (l > 0) { - *entry++ = *src++; - l--; + for (i = 0; i < identify_list_size; i++) { + *entry++ = mbfl_no2encoding(*src++); n++; } } } else { - no_encoding = mbfl_name2no_encoding(p1); - if (no_encoding != mbfl_no_encoding_invalid) { - *entry++ = no_encoding; + const mbfl_encoding *encoding = mbfl_name2encoding(p1); + if (encoding) { + *entry++ = encoding; n++; } else { ret = 0; @@ -772,39 +775,26 @@ php_mb_parse_encoding_list(const char *value, int valu } /* }}} */ -/* {{{ MBSTRING_API php_mb_check_encoding_list */ -MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) { - return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC); -} -/* }}} */ - /* {{{ static int php_mb_parse_encoding_array() * Return 0 if input contains any illegal encoding, otherwise 1. * Even if any illegal encoding is detected the result may contain a list * of parsed encodings. */ static int -php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) { zval **hash_entry; HashTable *target_hash; - int i, n, l, size, bauto,ret = 1; - enum mbfl_no_encoding no_encoding; - enum mbfl_no_encoding *src, *list, *entry; + int i, n, size, bauto, ret = SUCCESS; + const mbfl_encoding **list, **entry; list = NULL; if (Z_TYPE_P(array) == IS_ARRAY) { - enum mbfl_no_encoding *identify_list; - int identify_list_size; - - identify_list = MBSTRG(default_detect_order_list); - identify_list_size = MBSTRG(default_detect_order_list_size); - target_hash = Z_ARRVAL_P(array); zend_hash_internal_pointer_reset(target_hash); i = zend_hash_num_elements(target_hash); - size = i + identify_list_size; - list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); + size = i + MBSTRG(default_detect_order_list_size); + list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); if (list != NULL) { entry = list; bauto = 0; @@ -816,22 +806,23 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_ convert_to_string_ex(hash_entry); if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) { if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t j; + bauto = 1; - l = identify_list_size; - src = identify_list; - while (l > 0) { - *entry++ = *src++; - l--; + for (j = 0; j < identify_list_size; j++) { + *entry++ = mbfl_no2encoding(*src++); n++; } } } else { - no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry)); - if (no_encoding != mbfl_no_encoding_invalid) { - *entry++ = no_encoding; + const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry)); + if (encoding) { + *entry++ = encoding; n++; } else { - ret = 0; + ret = FAILURE; } } zend_hash_move_forward(target_hash); @@ -848,7 +839,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_ if (return_list) { *return_list = NULL; } - ret = 0; + ret = FAILURE; } if (return_size) { *return_size = n; @@ -860,7 +851,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_ if (return_size) { *return_size = 0; } - ret = 0; + ret = FAILURE; } } @@ -868,6 +859,118 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_ } /* }}} */ +/* {{{ zend_multibyte interface */ +static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC) +{ + return (const zend_encoding*)mbfl_name2encoding(encoding_name); +} + +static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding) +{ + return ((const mbfl_encoding *)encoding)->name; +} + +static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding) +{ + const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding; + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + return 1; + } + if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) { + return 1; + } + return 0; +} + +static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC) +{ + mbfl_string string; + + if (!list) { + list = (const zend_encoding **)MBSTRG(current_detect_order_list); + list_size = MBSTRG(current_detect_order_list_size); + } + + mbfl_string_init(&string); + string.no_language = MBSTRG(language); + string.val = (unsigned char *)arg_string; + string.len = arg_length; + return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0); +} + +static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) +{ + mbfl_string string, result; + mbfl_buffer_converter *convd; + int status, loc; + + /* new encoding */ + /* initialize string */ + mbfl_string_init(&string); + mbfl_string_init(&result); + string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding; + string.no_language = MBSTRG(language); + string.val = (unsigned char*)from; + string.len = from_length; + + /* initialize converter */ + convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len); + if (convd == NULL) { + return -1; + } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + + /* do it */ + status = mbfl_buffer_converter_feed2(convd, &string, &loc); + if (status) { + mbfl_buffer_converter_delete(convd); + return (size_t)-1; + } + + mbfl_buffer_converter_flush(convd); + if (!mbfl_buffer_converter_result(convd, &result)) { + mbfl_buffer_converter_delete(convd); + return (size_t)-1; + } + + *to = result.val; + *to_length = result.len; + + mbfl_buffer_converter_delete(convd); + + return loc; +} + +static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) +{ + return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC); +} + +static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D) +{ + return (const zend_encoding *)MBSTRG(internal_encoding); +} + +static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC) +{ + MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding; + return SUCCESS; +} + +static zend_multibyte_functions php_mb_zend_multibyte_functions = { + "mbstring", + php_mb_zend_encoding_fetcher, + php_mb_zend_encoding_name_getter, + php_mb_zend_encoding_lexer_compatibility_checker, + php_mb_zend_encoding_detector, + php_mb_zend_encoding_converter, + php_mb_zend_encoding_list_parser, + php_mb_zend_internal_encoding_getter, + php_mb_zend_internal_encoding_setter +}; +/* }}} */ + static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC); static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); static void _php_mb_free_regex(void *opaque); @@ -942,7 +1045,7 @@ static void _php_mb_free_regex(void *opaque) #endif /* {{{ php_mb_nls_get_default_detect_order_list */ -static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size) +static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size) { size_t i; @@ -960,6 +1063,124 @@ static int php_mb_nls_get_default_detect_order_list(en } /* }}} */ +static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC) +{ + char *result = emalloc(len + 2); + char *resp = result; + int i; + + for (i = 0; i < len && start[i] != quote; ++i) { + if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) { + *resp++ = start[++i]; + } else { + size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding); + + while (j-- > 0 && i < len) { + *resp++ = start[i++]; + } + --i; + } + } + + *resp = '\0'; + return result; +} + +static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */ +{ + char *pos = *line, quote; + char *res; + + while (*pos && *pos != stop) { + if ((quote = *pos) == '"' || quote == '\'') { + ++pos; + while (*pos && *pos != quote) { + if (*pos == '\\' && pos[1] && pos[1] == quote) { + pos += 2; + } else { + ++pos; + } + } + if (*pos) { + ++pos; + } + } else { + pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding); + + } + } + if (*pos == '\0') { + res = estrdup(*line); + *line += strlen(*line); + return res; + } + + res = estrndup(*line, pos - *line); + + while (*pos == stop) { + pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding); + } + + *line = pos; + return res; +} +/* }}} */ + +static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */ +{ + while (*str && isspace(*(unsigned char *)str)) { + ++str; + } + + if (!*str) { + return estrdup(""); + } + + if (*str == '"' || *str == '\'') { + char quote = *str; + + str++; + return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC); + } else { + char *strend = str; + + while (*strend && !isspace(*(unsigned char *)strend)) { + ++strend; + } + return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC); + } +} +/* }}} */ + +static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */ +{ + char *s, *s2; + const size_t filename_len = strlen(filename); + + /* The \ check should technically be needed for win32 systems only where + * it is a valid path separator. However, IE in all it's wisdom always sends + * the full path of the file on the user's filesystem, which means that unless + * the user does basename() they get a bogus file name. Until IE's user base drops + * to nill or problem is fixed this code must remain enabled for all systems. */ + s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding); + s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding); + + if (s && s2) { + if (s > s2) { + return ++s; + } else { + return ++s2; + } + } else if (s) { + return ++s; + } else if (s2) { + return ++s2; + } else { + return filename; + } +} +/* }}} */ + /* {{{ php.ini directive handler */ /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */ static PHP_INI_MH(OnUpdate_mbstring_language) @@ -980,23 +1201,27 @@ static PHP_INI_MH(OnUpdate_mbstring_language) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */ static PHP_INI_MH(OnUpdate_mbstring_detect_order) { - enum mbfl_no_encoding *list; - int size; + const mbfl_encoding **list; + size_t size; - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { + if (!new_value) { if (MBSTRG(detect_order_list)) { - free(MBSTRG(detect_order_list)); + pefree(MBSTRG(detect_order_list), 1); } - MBSTRG(detect_order_list) = list; - MBSTRG(detect_order_list_size) = size; - } else { - if (MBSTRG(detect_order_list)) { - free(MBSTRG(detect_order_list)); - MBSTRG(detect_order_list) = NULL; - } + MBSTRG(detect_order_list) = NULL; + MBSTRG(detect_order_list_size) = 0; + return SUCCESS; + } + + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { return FAILURE; } + if (MBSTRG(detect_order_list)) { + pefree(MBSTRG(detect_order_list), 1); + } + MBSTRG(detect_order_list) = list; + MBSTRG(detect_order_list_size) = size; return SUCCESS; } /* }}} */ @@ -1004,24 +1229,28 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ static PHP_INI_MH(OnUpdate_mbstring_http_input) { - enum mbfl_no_encoding *list; - int size; + const mbfl_encoding **list; + size_t size; - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { + if (!new_value) { if (MBSTRG(http_input_list)) { - free(MBSTRG(http_input_list)); + pefree(MBSTRG(http_input_list), 1); } - MBSTRG(http_input_list) = list; - MBSTRG(http_input_list_size) = size; - } else { - if (MBSTRG(http_input_list)) { - free(MBSTRG(http_input_list)); - MBSTRG(http_input_list) = NULL; - } + MBSTRG(http_input_list) = NULL; MBSTRG(http_input_list_size) = 0; + return SUCCESS; + } + + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { return FAILURE; } + if (MBSTRG(http_input_list)) { + pefree(MBSTRG(http_input_list), 1); + } + MBSTRG(http_input_list) = list; + MBSTRG(http_input_list_size) = size; + return SUCCESS; } /* }}} */ @@ -1029,20 +1258,23 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ static PHP_INI_MH(OnUpdate_mbstring_http_output) { - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; - no_encoding = mbfl_name2no_encoding(new_value); - if (no_encoding != mbfl_no_encoding_invalid) { - MBSTRG(http_output_encoding) = no_encoding; - MBSTRG(current_http_output_encoding) = no_encoding; - } else { - MBSTRG(http_output_encoding) = mbfl_no_encoding_pass; - MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass; - if (new_value != NULL && new_value_length > 0) { - return FAILURE; - } + if (new_value == NULL || new_value_length == 0) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return SUCCESS; } + encoding = mbfl_name2encoding(new_value); + if (!encoding) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return FAILURE; + } + + MBSTRG(http_output_encoding) = encoding; + MBSTRG(current_http_output_encoding) = encoding; return SUCCESS; } /* }}} */ @@ -1050,62 +1282,44 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC) { - enum mbfl_no_encoding no_encoding; - const char *enc_name = NULL; - uint enc_name_len = 0; - - no_encoding = new_value ? mbfl_name2no_encoding(new_value): - mbfl_no_encoding_invalid; - if (no_encoding != mbfl_no_encoding_invalid) { - enc_name = new_value; - enc_name_len = new_value_length; - } else { + const mbfl_encoding *encoding; + + if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) { switch (MBSTRG(language)) { case mbfl_no_language_uni: - enc_name = "UTF-8"; - enc_name_len = sizeof("UTF-8") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); break; case mbfl_no_language_japanese: - enc_name = "EUC-JP"; - enc_name_len = sizeof("EUC-JP") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp); break; case mbfl_no_language_korean: - enc_name = "EUC-KR"; - enc_name_len = sizeof("EUC-KR") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr); break; case mbfl_no_language_simplified_chinese: - enc_name = "EUC-CN"; - enc_name_len = sizeof("EUC-CN") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn); break; case mbfl_no_language_traditional_chinese: - enc_name = "EUC-TW"; - enc_name_len = sizeof("EUC-TW") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw); break; case mbfl_no_language_russian: - enc_name = "KOI8-R"; - enc_name_len = sizeof("KOI8-R") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r); break; case mbfl_no_language_german: - enc_name = "ISO-8859-15"; - enc_name_len = sizeof("ISO-8859-15") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15); break; case mbfl_no_language_armenian: - enc_name = "ArmSCII-8"; - enc_name_len = sizeof("ArmSCII-8") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8); break; case mbfl_no_language_turkish: - enc_name = "ISO-8859-9"; - enc_name_len = sizeof("ISO-8859-9") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9); break; default: - enc_name = "ISO-8859-1"; - enc_name_len = sizeof("ISO-8859-1") - 1; + encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1); break; } - no_encoding = mbfl_name2no_encoding(enc_name); } - MBSTRG(internal_encoding) = no_encoding; - MBSTRG(current_internal_encoding) = no_encoding; + MBSTRG(internal_encoding) = encoding; + MBSTRG(current_internal_encoding) = encoding; #if HAVE_MBREGEX { const char *enc_name = new_value; @@ -1124,6 +1338,9 @@ int _php_mb_ini_mbstring_internal_encoding_set(const c /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) { + if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) { + return FAILURE; + } if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN || stage == PHP_INI_STAGE_RUNTIME) { return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); @@ -1139,32 +1356,6 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) } /* }}} */ -#ifdef ZEND_MULTIBYTE -/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */ -static PHP_INI_MH(OnUpdate_mbstring_script_encoding) -{ - int *list, size; - - if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { - if (MBSTRG(script_encoding_list) != NULL) { - free(MBSTRG(script_encoding_list)); - } - MBSTRG(script_encoding_list) = list; - MBSTRG(script_encoding_list_size) = size; - } else { - if (MBSTRG(script_encoding_list) != NULL) { - free(MBSTRG(script_encoding_list)); - } - MBSTRG(script_encoding_list) = NULL; - MBSTRG(script_encoding_list_size) = 0; - return FAILURE; - } - - return SUCCESS; -} -/* }}} */ -#endif /* ZEND_MULTIBYTE */ - /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) { @@ -1207,7 +1398,7 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_charact static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) { if (new_value == NULL) { - return FAILURE; + return FAILURE; } OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC); @@ -1261,10 +1452,7 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) - PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding) -#ifdef ZEND_MULTIBYTE - PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) -#endif /* ZEND_MULTIBYTE */ + STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) @@ -1289,19 +1477,15 @@ PHP_INI_END() static PHP_GINIT_FUNCTION(mbstring) { mbstring_globals->language = mbfl_no_language_uni; - mbstring_globals->internal_encoding = mbfl_no_encoding_invalid; + mbstring_globals->internal_encoding = NULL; mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; -#ifdef ZEND_MULTIBYTE - mbstring_globals->script_encoding_list = NULL; - mbstring_globals->script_encoding_list_size = 0; -#endif /* ZEND_MULTIBYTE */ - mbstring_globals->http_output_encoding = mbfl_no_encoding_pass; - mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass; - mbstring_globals->http_input_identify = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid; - mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid; + mbstring_globals->http_output_encoding = &mbfl_encoding_pass; + mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass; + mbstring_globals->http_input_identify = NULL; + mbstring_globals->http_input_identify_get = NULL; + mbstring_globals->http_input_identify_post = NULL; + mbstring_globals->http_input_identify_cookie = NULL; + mbstring_globals->http_input_identify_string = NULL; mbstring_globals->http_input_list = NULL; mbstring_globals->http_input_list_size = 0; mbstring_globals->detect_order_list = NULL; @@ -1332,11 +1516,6 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring) if (mbstring_globals->http_input_list) { free(mbstring_globals->http_input_list); } -#ifdef ZEND_MULTIBYTE - if (mbstring_globals->script_encoding_list) { - free(mbstring_globals->script_encoding_list); - } -#endif /* ZEND_MULTIBYTE */ if (mbstring_globals->detect_order_list) { free(mbstring_globals->detect_order_list); } @@ -1357,7 +1536,7 @@ PHP_MINIT_FUNCTION(mbstring) REGISTER_INI_ENTRIES(); /* This is a global handler. Should not be set in a per-request handler. */ - sapi_register_treat_data(mbstr_treat_data); + sapi_register_treat_data(mbstr_treat_data TSRMLS_CC); /* Post handlers are stored in the thread-local context. */ if (MBSTRG(encoding_translation)) { @@ -1375,6 +1554,19 @@ PHP_MINIT_FUNCTION(mbstring) #if HAVE_MBREGEX PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif + + if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) { + return FAILURE; + } + + php_rfc1867_set_multibyte_callbacks( + php_mb_encoding_translation, + php_mb_gpc_get_detect_order, + php_mb_gpc_set_input_encoding, + php_mb_rfc1867_getword, + php_mb_rfc1867_getword_conf, + php_mb_rfc1867_basename); + return SUCCESS; } /* }}} */ @@ -1383,7 +1575,7 @@ PHP_MINIT_FUNCTION(mbstring) PHP_MSHUTDOWN_FUNCTION(mbstring) { UNREGISTER_INI_ENTRIES(); - + #if HAVE_MBREGEX PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif @@ -1395,8 +1587,6 @@ PHP_MSHUTDOWN_FUNCTION(mbstring) /* {{{ PHP_RINIT_FUNCTION(mbstring) */ PHP_RINIT_FUNCTION(mbstring) { - int n; - enum mbfl_no_encoding *list=NULL, *entry; zend_function *func, *orig; const struct mb_overload_def *p; @@ -1407,22 +1597,7 @@ PHP_RINIT_FUNCTION(mbstring) MBSTRG(illegalchars) = 0; - n = 0; - if (MBSTRG(detect_order_list)) { - list = MBSTRG(detect_order_list); - n = MBSTRG(detect_order_list_size); - } - if (n <= 0) { - list = MBSTRG(default_detect_order_list); - n = MBSTRG(default_detect_order_list_size); - } - entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0); - MBSTRG(current_detect_order_list) = entry; - MBSTRG(current_detect_order_list_size) = n; - while (n > 0) { - *entry++ = *list++; - n--; - } + php_mb_populate_current_detect_order_list(TSRMLS_C); /* override original function. */ if (MBSTRG(func_overload)){ @@ -1454,10 +1629,7 @@ PHP_RINIT_FUNCTION(mbstring) #if HAVE_MBREGEX PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif -#ifdef ZEND_MULTIBYTE - zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); - php_mb_set_zend_encoding(TSRMLS_C); -#endif /* ZEND_MULTIBYTE */ + zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC); return SUCCESS; } @@ -1481,11 +1653,11 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) } /* clear http input identification. */ - MBSTRG(http_input_identify) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid; - MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid; + MBSTRG(http_input_identify) = NULL; + MBSTRG(http_input_identify_post) = NULL; + MBSTRG(http_input_identify_get) = NULL; + MBSTRG(http_input_identify_cookie) = NULL; + MBSTRG(http_input_identify_string) = NULL; /* clear overloaded function. */ if (MBSTRG(func_overload)){ @@ -1516,7 +1688,12 @@ PHP_MINFO_FUNCTION(mbstring) php_info_print_table_start(); php_info_print_table_row(2, "Multibyte Support", "enabled"); php_info_print_table_row(2, "Multibyte string engine", "libmbfl"); - php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled"); + php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled"); + { + char tmp[256]; + snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY); + php_info_print_table_row(2, "libmbfl version", tmp); + } php_info_print_table_end(); php_info_print_table_start(); @@ -1560,33 +1737,27 @@ PHP_FUNCTION(mb_language) Sets the current internal encoding or Returns the current internal encoding as a string */ PHP_FUNCTION(mb_internal_encoding) { - char *name = NULL; + const char *name = NULL; int name_len; - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { RETURN_FALSE; } if (name == NULL) { - name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); + name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL; if (name != NULL) { RETURN_STRING(name, 1); } else { RETURN_FALSE; } } else { - no_encoding = mbfl_name2no_encoding(name); - if (no_encoding == mbfl_no_encoding_invalid) { + encoding = mbfl_name2encoding(name); + if (!encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { - MBSTRG(current_internal_encoding) = no_encoding; -#ifdef ZEND_MULTIBYTE - /* TODO: make independent from mbstring.encoding_translation? */ - if (MBSTRG(encoding_translation)) { - zend_multibyte_set_internal_encoding(name TSRMLS_CC); - } -#endif /* ZEND_MULTIBYTE */ + MBSTRG(current_internal_encoding) = encoding; RETURN_TRUE; } } @@ -1599,10 +1770,9 @@ PHP_FUNCTION(mb_http_input) { char *typ = NULL; int typ_len; - int retname, n; - char *name, *list, *temp; - enum mbfl_no_encoding *entry; - enum mbfl_no_encoding result = mbfl_no_encoding_invalid; + int retname; + char *list, *temp; + const mbfl_encoding *result = NULL; retname = 1; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { @@ -1630,40 +1800,38 @@ PHP_FUNCTION(mb_http_input) break; case 'I': case 'i': - array_init(return_value); - entry = MBSTRG(http_input_list); - n = MBSTRG(http_input_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); + { + const mbfl_encoding **entry = MBSTRG(http_input_list); + const size_t n = MBSTRG(http_input_list_size); + size_t i; + array_init(return_value); + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); + entry++; } - entry++; - n--; + retname = 0; } - retname = 0; break; case 'L': case 'l': - entry = MBSTRG(http_input_list); - n = MBSTRG(http_input_list_size); - list = NULL; - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { + { + const mbfl_encoding **entry = MBSTRG(http_input_list); + const size_t n = MBSTRG(http_input_list_size); + size_t i; + list = NULL; + for (i = 0; i < n; i++) { if (list) { temp = list; - spprintf(&list, 0, "%s,%s", temp, name); + spprintf(&list, 0, "%s,%s", temp, (*entry)->name); efree(temp); if (!list) { break; } } else { - list = estrdup(name); + list = estrdup((*entry)->name); } + entry++; } - entry++; - n--; } if (!list) { RETURN_FALSE; @@ -1678,9 +1846,8 @@ PHP_FUNCTION(mb_http_input) } if (retname) { - if (result != mbfl_no_encoding_invalid && - (name = (char *)mbfl_no_encoding2name(result)) != NULL) { - RETVAL_STRING(name, 1); + if (result) { + RETVAL_STRING(result->name, 1); } else { RETVAL_FALSE; } @@ -1692,28 +1859,28 @@ PHP_FUNCTION(mb_http_input) Sets the current output_encoding or returns the current output_encoding as a string */ PHP_FUNCTION(mb_http_output) { - char *name = NULL; + const char *name = NULL; int name_len; - enum mbfl_no_encoding no_encoding; + const mbfl_encoding *encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) { RETURN_FALSE; } if (name == NULL) { - name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding)); + name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL; if (name != NULL) { RETURN_STRING(name, 1); } else { RETURN_FALSE; } } else { - no_encoding = mbfl_name2no_encoding(name); - if (no_encoding == mbfl_no_encoding_invalid) { + encoding = mbfl_name2encoding(name); + if (!encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { - MBSTRG(current_http_output_encoding) = no_encoding; + MBSTRG(current_http_output_encoding) = encoding; RETURN_TRUE; } } @@ -1725,32 +1892,26 @@ PHP_FUNCTION(mb_http_output) PHP_FUNCTION(mb_detect_order) { zval **arg1 = NULL; - int n, size; - enum mbfl_no_encoding *list, *entry; - char *name; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { return; } if (!arg1) { + size_t i; + size_t n = MBSTRG(current_detect_order_list_size); + const mbfl_encoding **entry = MBSTRG(current_detect_order_list); array_init(return_value); - entry = MBSTRG(current_detect_order_list); - n = MBSTRG(current_detect_order_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); entry++; - n--; } } else { - list = NULL; - size = 0; + const mbfl_encoding **list = NULL; + size_t size = 0; switch (Z_TYPE_PP(arg1)) { case IS_ARRAY: - if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); } @@ -1759,7 +1920,7 @@ PHP_FUNCTION(mb_detect_order) break; default: convert_to_string_ex(arg1); - if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); } @@ -1879,15 +2040,15 @@ PHP_FUNCTION(mb_parse_str) char *encstr = NULL; int encstr_len; php_mb_encoding_handler_info_t info; - enum mbfl_no_encoding detected; + const mbfl_encoding *detected; track_vars_array = NULL; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) { return; } - /* Clear out the array */ if (track_vars_array != NULL) { + /* Clear out the array */ zval_dtor(track_vars_array); array_init(track_vars_array); } @@ -1896,7 +2057,6 @@ PHP_FUNCTION(mb_parse_str) info.data_type = PARSE_STRING; info.separator = PG(arg_separator).input; - info.force_register_globals = (track_vars_array == NULL); info.report_errors = 1; info.to_encoding = MBSTRG(current_internal_encoding); info.to_language = MBSTRG(language); @@ -1904,11 +2064,20 @@ PHP_FUNCTION(mb_parse_str) info.num_from_encodings = MBSTRG(http_input_list_size); info.from_language = MBSTRG(language); - detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC); + if (track_vars_array != NULL) { + detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC); + } else { + zval tmp; + if (!EG(active_symbol_table)) { + zend_rebuild_symbol_table(TSRMLS_C); + } + Z_ARRVAL(tmp) = EG(active_symbol_table); + detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC); + } MBSTRG(http_input_identify) = detected; - RETVAL_BOOL(detected != mbfl_no_encoding_invalid); + RETVAL_BOOL(detected); if (encstr != NULL) efree(encstr); } @@ -1924,7 +2093,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_string string, result; const char *charset; char *p; - enum mbfl_no_encoding encoding; + const mbfl_encoding *encoding; int last_feed, len; unsigned char send_text_mimetype = 0; char *s, *mimetype = NULL; @@ -1943,7 +2112,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_buffer_converter_delete(MBSTRG(outconv)); MBSTRG(outconv) = NULL; } - if (encoding == mbfl_no_encoding_pass) { + if (encoding == &mbfl_encoding_pass) { RETURN_STRINGL(arg_string, arg_string_len, 1); } @@ -1965,7 +2134,7 @@ PHP_FUNCTION(mb_output_handler) /* if content-type is not yet set, set it and activate the converter */ if (SG(sapi_headers).send_default_content_type || send_text_mimetype) { - charset = mbfl_no2preferred_mime_name(encoding); + charset = encoding->mime_name; if (charset) { len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset ); if (sapi_add_header(p, len, 0) != FAILURE) { @@ -1973,7 +2142,7 @@ PHP_FUNCTION(mb_output_handler) } } /* activate the converter */ - MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0); + MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0); if (send_text_mimetype){ efree(mimetype); } @@ -1994,7 +2163,7 @@ PHP_FUNCTION(mb_output_handler) /* feed the string */ mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)arg_string; string.len = arg_string_len; mbfl_buffer_converter_feed(MBSTRG(outconv), &string); @@ -2031,7 +2200,7 @@ PHP_FUNCTION(mb_strlen) string.no_language = MBSTRG(language); if (enc_name == NULL) { - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; } else { string.no_encoding = mbfl_name2no_encoding(enc_name); if (string.no_encoding == mbfl_no_encoding_invalid) { @@ -2058,13 +2227,13 @@ PHP_FUNCTION(mb_strpos) mbfl_string haystack, needle; char *enc_name = NULL; int enc_name_len; - + mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; offset = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) { @@ -2129,9 +2298,9 @@ PHP_FUNCTION(mb_strrpos) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2218,7 +2387,7 @@ PHP_FUNCTION(mb_stripos) int n; long offset; mbfl_string haystack, needle; - char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; int from_encoding_len; n = -1; offset = 0; @@ -2247,7 +2416,7 @@ PHP_FUNCTION(mb_strripos) int n; long offset; mbfl_string haystack, needle; - const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; int from_encoding_len; n = -1; offset = 0; @@ -2279,9 +2448,9 @@ PHP_FUNCTION(mb_strstr) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2337,9 +2506,9 @@ PHP_FUNCTION(mb_strrchr) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { RETURN_FALSE; @@ -2392,13 +2561,13 @@ PHP_FUNCTION(mb_stristr) unsigned int from_encoding_len, len, mblen; int n; mbfl_string haystack, needle, result, *ret = NULL; - const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { @@ -2450,13 +2619,13 @@ PHP_FUNCTION(mb_strrichr) zend_bool part = 0; int n, from_encoding_len, len, mblen; mbfl_string haystack, needle, result, *ret = NULL; - char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->name; mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { @@ -2508,9 +2677,9 @@ PHP_FUNCTION(mb_substr_count) mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) { return; @@ -2546,15 +2715,16 @@ PHP_FUNCTION(mb_substr) char *str, *encoding; long from, len; int mblen, str_len, encoding_len; + zval **z_len = NULL; mbfl_string string, result, *ret; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) { return; } mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (argc == 4) { string.no_encoding = mbfl_name2no_encoding(encoding); @@ -2567,8 +2737,11 @@ PHP_FUNCTION(mb_substr) string.val = (unsigned char *)str; string.len = str_len; - if (argc < 3) { + if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) { len = str_len; + } else { + convert_to_long_ex(z_len); + len = Z_LVAL_PP(z_len); } /* measures length */ @@ -2619,13 +2792,14 @@ PHP_FUNCTION(mb_strcut) char *encoding; long from, len; int encoding_len; + zval **z_len = NULL; mbfl_string string, result, *ret; mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) { return; } @@ -2637,8 +2811,11 @@ PHP_FUNCTION(mb_strcut) } } - if (argc < 3) { + if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) { len = string.len; + } else { + convert_to_long_ex(z_len); + len = Z_LVAL_PP(z_len); } /* if "from" position is negative, count start position from the end @@ -2686,7 +2863,7 @@ PHP_FUNCTION(mb_strwidth) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { return; @@ -2725,9 +2902,9 @@ PHP_FUNCTION(mb_strimwidth) mbfl_string_init(&string); mbfl_string_init(&marker); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.no_language = MBSTRG(language); - marker.no_encoding = MBSTRG(current_internal_encoding); + marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; marker.val = NULL; marker.len = 0; @@ -2771,9 +2948,10 @@ PHP_FUNCTION(mb_strimwidth) MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC) { mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_buffer_converter *convd; - int size, *list; + size_t size; + const mbfl_encoding **list; char *output=NULL; if (output_len) { @@ -2784,8 +2962,8 @@ MBSTRING_API char * php_mb_convert_encoding(const char } /* new encoding */ if (_to_encoding && strlen(_to_encoding)) { - to_encoding = mbfl_name2no_encoding(_to_encoding); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(_to_encoding); + if (!to_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding); return NULL; } @@ -2797,7 +2975,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); string.val = (unsigned char *)input; string.len = length; @@ -2806,20 +2984,20 @@ MBSTRING_API char * php_mb_convert_encoding(const char if (_from_encodings) { list = NULL; size = 0; - php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); + php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); if (size == 1) { from_encoding = *list; - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; } else if (size > 1) { /* auto detect */ - from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection)); - if (from_encoding != mbfl_no_encoding_invalid) { - string.no_encoding = from_encoding; + from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection)); + if (from_encoding) { + string.no_encoding = from_encoding->no_encoding; } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding"); - from_encoding = mbfl_no_encoding_pass; + from_encoding = &mbfl_encoding_pass; to_encoding = from_encoding; - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; } } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified"); @@ -2830,7 +3008,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char } /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter"); return NULL; @@ -2931,7 +3109,8 @@ PHP_FUNCTION(mb_convert_encoding) Returns a case-folded version of sourcestring */ PHP_FUNCTION(mb_convert_case) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; long case_mode = 0; char *newstr; @@ -2955,7 +3134,8 @@ PHP_FUNCTION(mb_convert_case) */ PHP_FUNCTION(mb_strtoupper) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; char *newstr; size_t ret_len; @@ -2978,7 +3158,8 @@ PHP_FUNCTION(mb_strtoupper) */ PHP_FUNCTION(mb_strtolower) { - char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); + const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; + char *str; int str_len, from_encoding_len; char *newstr; size_t ret_len; @@ -3006,9 +3187,9 @@ PHP_FUNCTION(mb_detect_encoding) zval *encoding_list; mbfl_string string; - const char *ret; - enum mbfl_no_encoding *elist; - int size, *list; + const mbfl_encoding *ret; + const mbfl_encoding **elist, **list; + size_t size; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) { return; @@ -3020,7 +3201,7 @@ PHP_FUNCTION(mb_detect_encoding) if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) { switch (Z_TYPE_P(encoding_list)) { case IS_ARRAY: - if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); list = NULL; @@ -3030,7 +3211,7 @@ PHP_FUNCTION(mb_detect_encoding) break; default: convert_to_string(encoding_list); - if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { + if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { if (list) { efree(list); list = NULL; @@ -3059,7 +3240,7 @@ PHP_FUNCTION(mb_detect_encoding) string.no_language = MBSTRG(language); string.val = (unsigned char *)str; string.len = str_len; - ret = mbfl_identify_encoding_name(&string, elist, size, strict); + ret = mbfl_identify_encoding2(&string, elist, size, strict); if (list != NULL) { efree((void *)list); @@ -3069,7 +3250,7 @@ PHP_FUNCTION(mb_detect_encoding) RETURN_FALSE; } - RETVAL_STRING((char *)ret, 1); + RETVAL_STRING((char *)ret->name, 1); } /* }}} */ @@ -3134,7 +3315,7 @@ PHP_FUNCTION(mb_encode_mimeheader) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { return; @@ -3168,7 +3349,7 @@ PHP_FUNCTION(mb_encode_mimeheader) mbfl_string_init(&result); ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent); if (ret != NULL) { - RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */ + RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ } else { RETVAL_FALSE; } @@ -3183,16 +3364,16 @@ PHP_FUNCTION(mb_decode_mimeheader) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) { return; } mbfl_string_init(&result); - ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)); + ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding); if (ret != NULL) { - RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */ + RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ } else { RETVAL_FALSE; } @@ -3212,7 +3393,7 @@ PHP_FUNCTION(mb_convert_kana) mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { return; @@ -3311,21 +3492,22 @@ PHP_FUNCTION(mb_convert_variables) zval ***args, ***stack, **var, **hash_entry, **zfrom_enc; HashTable *target_hash; mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; + const mbfl_encoding *from_encoding, *to_encoding; mbfl_encoding_detector *identd; mbfl_buffer_converter *convd; - int n, to_enc_len, argc, stack_level, stack_max, elistsz; - enum mbfl_no_encoding *elist; - char *name, *to_enc; + int n, to_enc_len, argc, stack_level, stack_max; + size_t elistsz; + const mbfl_encoding **elist; + char *to_enc; void *ptmp; - + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { return; } /* new encoding */ - to_encoding = mbfl_name2no_encoding(to_enc); - if (to_encoding == mbfl_no_encoding_invalid) { + to_encoding = mbfl_name2encoding(to_enc); + if (!to_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc); efree(args); RETURN_FALSE; @@ -3335,7 +3517,7 @@ PHP_FUNCTION(mb_convert_variables) mbfl_string_init(&string); mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.no_encoding = from_encoding; + string.no_encoding = from_encoding->no_encoding; string.no_language = MBSTRG(language); /* pre-conversion encoding */ @@ -3351,16 +3533,16 @@ PHP_FUNCTION(mb_convert_variables) break; } if (elistsz <= 0) { - from_encoding = mbfl_no_encoding_pass; + from_encoding = &mbfl_encoding_pass; } else if (elistsz == 1) { from_encoding = *elist; } else { /* auto detect */ - from_encoding = mbfl_no_encoding_invalid; + from_encoding = NULL; stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); stack_level = 0; - identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection)); + identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection)); if (identd != NULL) { n = 0; while (n < argc || stack_level > 0) { @@ -3413,14 +3595,14 @@ PHP_FUNCTION(mb_convert_variables) } } detect_end: - from_encoding = mbfl_encoding_detector_judge(identd); + from_encoding = mbfl_encoding_detector_judge2(identd); mbfl_encoding_detector_delete(identd); } efree(stack); - if (from_encoding == mbfl_no_encoding_invalid) { + if (!from_encoding) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); - from_encoding = mbfl_no_encoding_pass; + from_encoding = &mbfl_encoding_pass; } } if (elist != NULL) { @@ -3428,8 +3610,8 @@ detect_end: } /* create converter */ convd = NULL; - if (from_encoding != mbfl_no_encoding_pass) { - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); + if (from_encoding != &mbfl_encoding_pass) { + convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); RETURN_FALSE; @@ -3511,9 +3693,8 @@ detect_end: efree(args); - name = (char *)mbfl_no_encoding2name(from_encoding); - if (name != NULL) { - RETURN_STRING(name, 1); + if (from_encoding) { + RETURN_STRING(from_encoding->name, 1); } else { RETURN_FALSE; } @@ -3531,21 +3712,22 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS HashTable *target_hash; size_t argc = ZEND_NUM_ARGS(); int i, *convmap, *mapelm, mapsize=0; + zend_bool is_hex = 0; mbfl_string string, result, *ret; enum mbfl_no_encoding no_encoding; - if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) { + if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) { return; } mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; string.val = (unsigned char *)str; string.len = str_len; /* encoding */ - if (argc == 3) { + if ((argc == 3 || argc == 4) && encoding_len > 0) { no_encoding = mbfl_name2no_encoding(encoding); if (no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); @@ -3555,6 +3737,12 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS } } + if (argc == 4) { + if (type == 0 && is_hex) { + type = 2; /* output in hex format */ + } + } + /* conversion map */ convmap = NULL; if (Z_TYPE_P(zconvmap) == IS_ARRAY) { @@ -3592,7 +3780,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS } /* }}} */ -/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding]) +/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]]) Converts specified characters to HTML numeric entities */ PHP_FUNCTION(mb_encode_numericentity) { @@ -3852,9 +4040,9 @@ PHP_FUNCTION(mb_send_mail) mbfl_string orig_str, conv_str; mbfl_string *pstr; /* pointer to mbfl string for return value */ enum mbfl_no_encoding - tran_cs, /* transfar text charset */ - head_enc, /* header transfar encoding */ - body_enc; /* body transfar encoding */ + tran_cs, /* transfar text charset */ + head_enc, /* header transfar encoding */ + body_enc; /* body transfar encoding */ mbfl_memory_device device; /* automatic allocateable buffer for additional header */ const mbfl_language *lang; int err = 0; @@ -3862,12 +4050,7 @@ PHP_FUNCTION(mb_send_mail) smart_str *s; extern void mbfl_memory_device_unput(mbfl_memory_device *device); char *pp, *ee; - - if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE."); - RETURN_FALSE; - } - + /* initialize */ mbfl_memory_device_init(&device, 0, 0); mbfl_string_init(&orig_str); @@ -3966,15 +4149,15 @@ PHP_FUNCTION(mb_send_mail) /* To: */ if (to != NULL) { - if (to_len > 0) { - to_r = estrndup(to, to_len); - for (; to_len; to_len--) { - if (!isspace((unsigned char) to_r[to_len - 1])) { - break; - } - to_r[to_len - 1] = '\0'; - } - for (i = 0; to_r[i]; i++) { + if (to_len > 0) { + to_r = estrndup(to, to_len); + for (; to_len; to_len--) { + if (!isspace((unsigned char) to_r[to_len - 1])) { + break; + } + to_r[to_len - 1] = '\0'; + } + for (i = 0; to_r[i]; i++) { if (iscntrl((unsigned char) to_r[i])) { /* According to RFC 822, section 3.1.1 long headers may be separated into * parts using CRLF followed at least one linear-white-space character ('\t' or ' '). @@ -3984,11 +4167,11 @@ PHP_FUNCTION(mb_send_mail) SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i); to_r[i] = ' '; } - } - } else { - to_r = to; - } - } else { + } + } else { + to_r = to; + } + } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field"); err = 1; } @@ -3998,10 +4181,10 @@ PHP_FUNCTION(mb_send_mail) orig_str.no_language = MBSTRG(language); orig_str.val = (unsigned char *)subject; orig_str.len = subject_len; - orig_str.no_encoding = MBSTRG(current_internal_encoding); - if (orig_str.no_encoding == mbfl_no_encoding_invalid - || orig_str.no_encoding == mbfl_no_encoding_pass) { - orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { + const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; } pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); if (pstr != NULL) { @@ -4017,11 +4200,11 @@ PHP_FUNCTION(mb_send_mail) orig_str.no_language = MBSTRG(language); orig_str.val = (unsigned char *)message; orig_str.len = (unsigned int)message_len; - orig_str.no_encoding = MBSTRG(current_internal_encoding); + orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (orig_str.no_encoding == mbfl_no_encoding_invalid - || orig_str.no_encoding == mbfl_no_encoding_pass) { - orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { + const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; } pstr = NULL; @@ -4129,15 +4312,13 @@ PHP_FUNCTION(mb_send_mail) PHP_FUNCTION(mb_get_info) { char *typ = NULL; - int typ_len, n; + int typ_len; + size_t n; char *name; const struct mb_overload_def *over_func; zval *row1, *row2; const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); - enum mbfl_no_encoding *entry; -#ifdef ZEND_MULTIBYTE - zval *row3; -#endif /* ZEND_MULTIBYTE */ + const mbfl_encoding **entry; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { RETURN_FALSE; @@ -4145,14 +4326,14 @@ PHP_FUNCTION(mb_get_info) if (!typ || !strcasecmp("all", typ)) { array_init(return_value); - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { - add_assoc_string(return_value, "internal_encoding", name, 1); + if (MBSTRG(current_internal_encoding)) { + add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { - add_assoc_string(return_value, "http_input", name, 1); + if (MBSTRG(http_input_identify)) { + add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { - add_assoc_string(return_value, "http_output", name, 1); + if (MBSTRG(current_http_output_encoding)) { + add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1); } if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1); @@ -4194,15 +4375,13 @@ PHP_FUNCTION(mb_get_info) } n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); - if(n > 0) { + if (n > 0) { + size_t i; MAKE_STD_ZVAL(row2); array_init(row2); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row2, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(row2, (*entry)->name, 1); entry++; - n--; } add_assoc_zval(return_value, "detect_order", row2); } @@ -4220,33 +4399,17 @@ PHP_FUNCTION(mb_get_info) } else { add_assoc_string(return_value, "strict_detection", "Off", 1); } -#ifdef ZEND_MULTIBYTE - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - MAKE_STD_ZVAL(row3); - array_init(row3); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row3, name, 1); - } - entry++; - n--; - } - add_assoc_zval(return_value, "script_encoding", row3); - } -#endif /* ZEND_MULTIBYTE */ } else if (!strcasecmp("internal_encoding", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(current_internal_encoding)) { + RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1); } } else if (!strcasecmp("http_input", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(http_input_identify)) { + RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1); } } else if (!strcasecmp("http_output", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { - RETVAL_STRING(name, 1); + if (MBSTRG(current_http_output_encoding)) { + RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1); } } else if (!strcasecmp("http_output_conv_mimetypes", typ)) { if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { @@ -4294,15 +4457,12 @@ PHP_FUNCTION(mb_get_info) } else if (!strcasecmp("detect_order", typ)) { n = MBSTRG(current_detect_order_list_size); entry = MBSTRG(current_detect_order_list); - if(n > 0) { + if (n > 0) { + size_t i; array_init(return_value); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } + for (i = 0; i < n; i++) { + add_next_index_string(return_value, (*entry)->name, 1); entry++; - n--; } } } else if (!strcasecmp("substitute_character", typ)) { @@ -4322,24 +4482,6 @@ PHP_FUNCTION(mb_get_info) RETVAL_STRING("Off", 1); } } else { -#ifdef ZEND_MULTIBYTE - if (!strcasecmp("script_encoding", typ)) { - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - array_init(return_value); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - add_next_index_string(return_value, name, 1); - } - entry++; - n--; - } - } - return; - } -#endif /* ZEND_MULTIBYTE */ RETURN_FALSE; } } @@ -4354,7 +4496,7 @@ PHP_FUNCTION(mb_check_encoding) char *enc = NULL; int enc_len; mbfl_buffer_converter *convd; - enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding); + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_string string, result, *ret = NULL; long illegalchars = 0; @@ -4367,23 +4509,23 @@ PHP_FUNCTION(mb_check_encoding) } if (enc != NULL) { - no_encoding = mbfl_name2no_encoding(enc); - if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) { + encoding = mbfl_name2encoding(enc); + if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc); RETURN_FALSE; } } - - convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0); + + convd = mbfl_buffer_converter_new2(encoding, encoding, 0); if (convd == NULL) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); RETURN_FALSE; } mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); mbfl_buffer_converter_illegal_substchar(convd, 0); - + /* initialize string */ - mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding); + mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); mbfl_string_init(&result); string.val = (unsigned char *)var; @@ -4402,9 +4544,37 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ -/* {{{ MBSTRING_API int php_mb_encoding_translation() */ -MBSTRING_API int php_mb_encoding_translation(TSRMLS_D) + +/* {{{ php_mb_populate_current_detect_order_list */ +static void php_mb_populate_current_detect_order_list(TSRMLS_D) { + const mbfl_encoding **entry = 0; + size_t nentries; + + if (MBSTRG(current_detect_order_list)) { + return; + } + + if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) { + nentries = MBSTRG(detect_order_list_size); + entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); + memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries); + } else { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + size_t i; + nentries = MBSTRG(default_detect_order_list_size); + entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); + for (i = 0; i < nentries; i++) { + entry[i] = mbfl_no2encoding(src[i]); + } + } + MBSTRG(current_detect_order_list) = entry; + MBSTRG(current_detect_order_list_size) = nentries; +} + +/* {{{ static int php_mb_encoding_translation() */ +static int php_mb_encoding_translation(TSRMLS_D) +{ return MBSTRG(encoding_translation); } /* }}} */ @@ -4430,8 +4600,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC) { - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); } /* }}} */ @@ -4479,175 +4648,10 @@ MBSTRING_API char *php_mb_safe_strrchr_ex(const char * /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC) { - return php_mb_safe_strrchr_ex(s, c, nbytes, - mbfl_no2encoding(MBSTRG(internal_encoding))); + return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding)); } /* }}} */ -/* {{{ MBSTRING_API char *php_mb_strrchr() */ -MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC) -{ - return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC); -} -/* }}} */ - -/* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */ -MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC) -{ - - if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){ - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(http_input_identify))); - } else { - return php_mb_mbchar_bytes_ex(s, - mbfl_no2encoding(MBSTRG(internal_encoding))); - } -} -/* }}} */ - -/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */ -MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC) -{ - int i; - mbfl_string string, result, *ret = NULL; - enum mbfl_no_encoding from_encoding, to_encoding; - mbfl_buffer_converter *convd; - - if (encoding_to) { - /* new encoding */ - to_encoding = mbfl_name2no_encoding(encoding_to); - if (to_encoding == mbfl_no_encoding_invalid) { - return -1; - } - } else { - to_encoding = MBSTRG(current_internal_encoding); - } - if (encoding_from) { - /* old encoding */ - from_encoding = mbfl_name2no_encoding(encoding_from); - if (from_encoding == mbfl_no_encoding_invalid) { - return -1; - } - } else { - from_encoding = MBSTRG(http_input_identify); - } - - if (from_encoding == mbfl_no_encoding_pass) { - return 0; - } - - /* initialize string */ - mbfl_string_init(&string); - mbfl_string_init(&result); - string.no_encoding = from_encoding; - string.no_language = MBSTRG(language); - - for (i=0; ival; - len[i] = (int)ret->len; - } - - MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); - } - - return ret ? 0 : -1; -} -/* }}} */ - -/* {{{ MBSTRING_API int php_mb_gpc_encoding_detector() - */ -MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC) -{ - mbfl_string string; - enum mbfl_no_encoding *elist; - enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid; - mbfl_encoding_detector *identd = NULL; - - int size; - enum mbfl_no_encoding *list; - - if (MBSTRG(http_input_list_size) == 1 && - MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) { - MBSTRG(http_input_identify) = mbfl_no_encoding_pass; - return SUCCESS; - } - - if (MBSTRG(http_input_list_size) == 1 && - MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto && - mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) { - MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0]; - return SUCCESS; - } - - if (arg_list && strlen(arg_list)>0) { - /* make encoding list */ - list = NULL; - size = 0; - php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC); - - if (size > 0 && list != NULL) { - elist = list; - } else { - elist = MBSTRG(current_detect_order_list); - size = MBSTRG(current_detect_order_list_size); - if (size <= 0){ - elist = MBSTRG(default_detect_order_list); - size = MBSTRG(default_detect_order_list_size); - } - } - } else { - elist = MBSTRG(current_detect_order_list); - size = MBSTRG(current_detect_order_list_size); - if (size <= 0){ - elist = MBSTRG(default_detect_order_list); - size = MBSTRG(default_detect_order_list_size); - } - } - - mbfl_string_init(&string); - string.no_language = MBSTRG(language); - - identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection)); - - if (identd) { - int n = 0; - while(n < num){ - string.val = (unsigned char *)arg_string[n]; - string.len = arg_length[n]; - if (mbfl_encoding_detector_feed(identd, &string)) { - break; - } - n++; - } - encoding = mbfl_encoding_detector_judge(identd); - mbfl_encoding_detector_delete(identd); - } - - if (encoding != mbfl_no_encoding_invalid) { - MBSTRG(http_input_identify) = encoding; - return SUCCESS; - } else { - return FAILURE; - } -} -/* }}} */ - /* {{{ MBSTRING_API int php_mb_stripos() */ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC) @@ -4659,9 +4663,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char * mbfl_string_init(&haystack); mbfl_string_init(&needle); haystack.no_language = MBSTRG(language); - haystack.no_encoding = MBSTRG(current_internal_encoding); + haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); - needle.no_encoding = MBSTRG(current_internal_encoding); + needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; do { size_t len = 0; @@ -4725,165 +4729,18 @@ MBSTRING_API int php_mb_stripos(int mode, const char * } /* }}} */ -#ifdef ZEND_MULTIBYTE -/* {{{ php_mb_set_zend_encoding() */ -static int php_mb_set_zend_encoding(TSRMLS_D) +static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */ { - /* 'd better use mbfl_memory_device? */ - char *name, *list = NULL; - int n, *entry, list_size = 0; - zend_encoding_detector encoding_detector; - zend_encoding_converter encoding_converter; - zend_encoding_oddlen encoding_oddlen; - - /* notify script encoding to Zend Engine */ - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - while (n > 0) { - name = (char *)mbfl_no_encoding2name(*entry); - if (name) { - list_size += strlen(name) + 1; - if (!list) { - list = (char*)emalloc(list_size); - *list = '\0'; - } else { - list = (char*)erealloc(list, list_size); - strcat(list, ","); - } - strcat(list, name); - } - entry++; - n--; - } - zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC); - if (list) { - efree(list); - } - encoding_detector = php_mb_encoding_detector; - encoding_converter = php_mb_encoding_converter; - encoding_oddlen = php_mb_oddlen; - - /* TODO: make independent from mbstring.encoding_translation? */ - if (MBSTRG(encoding_translation)) { - /* notify internal encoding to Zend Engine */ - name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); - zend_multibyte_set_internal_encoding(name TSRMLS_CC); - } - - zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC); - - return 0; + *list = (const zend_encoding **)MBSTRG(http_input_list); + *list_size = MBSTRG(http_input_list_size); } /* }}} */ -/* {{{ char *php_mb_encoding_detector() - * Interface for Zend Engine - */ -static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC) +static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */ { - mbfl_string string; - const char *ret; - enum mbfl_no_encoding *elist; - int size, *list; - - /* make encoding list */ - list = NULL; - size = 0; - php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC); - if (size <= 0) { - return NULL; - } - if (size > 0 && list != NULL) { - elist = list; - } else { - elist = MBSTRG(current_detect_order_list); - size = MBSTRG(current_detect_order_list_size); - } - - mbfl_string_init(&string); - string.no_language = MBSTRG(language); - string.val = (unsigned char *)arg_string; - string.len = arg_length; - ret = mbfl_identify_encoding_name(&string, elist, size, 0); - if (list != NULL) { - efree((void *)list); - } - if (ret != NULL) { - return estrdup(ret); - } else { - return NULL; - } + MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding; } /* }}} */ - -/* {{{ int php_mb_encoding_converter() */ -static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC) -{ - mbfl_string string, result, *ret; - enum mbfl_no_encoding from_encoding, to_encoding; - mbfl_buffer_converter *convd; - - /* new encoding */ - to_encoding = mbfl_name2no_encoding(encoding_to); - if (to_encoding == mbfl_no_encoding_invalid) { - return -1; - } - /* old encoding */ - from_encoding = mbfl_name2no_encoding(encoding_from); - if (from_encoding == mbfl_no_encoding_invalid) { - return -1; - } - /* initialize string */ - mbfl_string_init(&string); - mbfl_string_init(&result); - string.no_encoding = from_encoding; - string.no_language = MBSTRG(language); - string.val = (unsigned char*)from; - string.len = from_length; - - /* initialize converter */ - convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); - if (convd == NULL) { - return -1; - } - mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); - mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); - - /* do it */ - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - if (ret != NULL) { - *to = ret->val; - *to_length = ret->len; - } - - MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); - - return ret ? 0 : -1; -} -/* }}} */ - -/* {{{ int php_mb_oddlen() - * returns number of odd (e.g. appears only first byte of multibyte - * character) chars - */ -static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC) -{ - mbfl_string mb_string; - - mbfl_string_init(&mb_string); - mb_string.no_language = MBSTRG(language); - mb_string.no_encoding = mbfl_name2no_encoding(encoding); - mb_string.val = (unsigned char *)string; - mb_string.len = length; - - if (mb_string.no_encoding == mbfl_no_encoding_invalid) { - return 0; - } - return mbfl_oddlen(&mb_string); -} -/* }}} */ -#endif /* ZEND_MULTIBYTE */ #endif /* HAVE_MBSTRING */