Annotation of embedaddon/php/ext/mbstring/mbstring.c, revision 1.1.1.2
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16: | Rui Hirokawa <hirokawa@php.net> |
17: +----------------------------------------------------------------------+
18: */
19:
1.1.1.2 ! misho 20: /* $Id$ */
1.1 misho 21:
22: /*
23: * PHP 4 Multibyte String module "mbstring"
24: *
25: * History:
26: * 2000.5.19 Release php-4.0RC2_jstring-1.0
27: * 2001.4.1 Release php4_jstring-1.0.91
28: * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29: * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30: */
31:
32: /*
33: * PHP3 Internationalization support program.
34: *
35: * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36: * All rights reserved.
37: *
38: * See README_PHP3-i18n-ja for more detail.
39: *
40: * Authors:
41: * Hironori Sato <satoh@jpnnet.com>
42: * Shigeru Kanemoto <sgk@happysize.co.jp>
43: * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44: * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45: */
46:
47: /* {{{ includes */
48: #ifdef HAVE_CONFIG_H
49: #include "config.h"
50: #endif
51:
52: #include "php.h"
53: #include "php_ini.h"
54: #include "php_variables.h"
55: #include "mbstring.h"
56: #include "ext/standard/php_string.h"
57: #include "ext/standard/php_mail.h"
58: #include "ext/standard/exec.h"
59: #include "ext/standard/php_smart_str.h"
60: #include "ext/standard/url.h"
61: #include "main/php_output.h"
62: #include "ext/standard/info.h"
63:
64: #include "libmbfl/mbfl/mbfl_allocators.h"
1.1.1.2 ! misho 65: #include "libmbfl/mbfl/mbfilter_pass.h"
1.1 misho 66:
67: #include "php_variables.h"
68: #include "php_globals.h"
69: #include "rfc1867.h"
70: #include "php_content_types.h"
71: #include "SAPI.h"
72: #include "php_unicode.h"
73: #include "TSRM.h"
74:
75: #include "mb_gpc.h"
76:
77: #if HAVE_MBREGEX
78: #include "php_mbregex.h"
79: #endif
80:
81: #include "zend_multibyte.h"
82:
83: #if HAVE_ONIG
84: #include "php_onig_compat.h"
85: #include <oniguruma.h>
86: #undef UChar
87: #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88: #include "ext/pcre/php_pcre.h"
89: #endif
90: /* }}} */
91:
92: #if HAVE_MBSTRING
93:
94: /* {{{ prototypes */
95: ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96:
97: static PHP_GINIT_FUNCTION(mbstring);
98: static PHP_GSHUTDOWN_FUNCTION(mbstring);
99:
1.1.1.2 ! misho 100: static void php_mb_populate_current_detect_order_list(TSRMLS_D);
! 101:
! 102: static int php_mb_encoding_translation(TSRMLS_D);
! 103:
! 104: static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
! 105:
! 106: static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
! 107:
1.1 misho 108: /* }}} */
109:
110: /* {{{ php_mb_default_identify_list */
111: typedef struct _php_mb_nls_ident_list {
112: enum mbfl_no_language lang;
1.1.1.2 ! misho 113: const enum mbfl_no_encoding *list;
! 114: size_t list_size;
1.1 misho 115: } php_mb_nls_ident_list;
116:
117: static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118: mbfl_no_encoding_ascii,
119: mbfl_no_encoding_jis,
120: mbfl_no_encoding_utf8,
121: mbfl_no_encoding_euc_jp,
122: mbfl_no_encoding_sjis
123: };
124:
125: static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126: mbfl_no_encoding_ascii,
127: mbfl_no_encoding_utf8,
128: mbfl_no_encoding_euc_cn,
129: mbfl_no_encoding_cp936
130: };
131:
132: static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133: mbfl_no_encoding_ascii,
134: mbfl_no_encoding_utf8,
135: mbfl_no_encoding_euc_tw,
136: mbfl_no_encoding_big5
137: };
138:
139: static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140: mbfl_no_encoding_ascii,
141: mbfl_no_encoding_utf8,
142: mbfl_no_encoding_euc_kr,
143: mbfl_no_encoding_uhc
144: };
145:
146: static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147: mbfl_no_encoding_ascii,
148: mbfl_no_encoding_utf8,
149: mbfl_no_encoding_koi8r,
150: mbfl_no_encoding_cp1251,
151: mbfl_no_encoding_cp866
152: };
153:
154: static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155: mbfl_no_encoding_ascii,
156: mbfl_no_encoding_utf8,
157: mbfl_no_encoding_armscii8
158: };
159:
160: static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161: mbfl_no_encoding_ascii,
162: mbfl_no_encoding_utf8,
163: mbfl_no_encoding_cp1254,
164: mbfl_no_encoding_8859_9
165: };
166:
167: static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168: mbfl_no_encoding_ascii,
169: mbfl_no_encoding_utf8,
170: mbfl_no_encoding_koi8u
171: };
172:
173: static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174: mbfl_no_encoding_ascii,
175: mbfl_no_encoding_utf8
176: };
177:
178:
179: static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180: { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181: { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182: { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183: { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184: { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185: { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186: { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187: { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188: { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189: };
190:
191: /* }}} */
192:
193: /* {{{ mb_overload_def mb_ovld[] */
194: static const struct mb_overload_def mb_ovld[] = {
195: {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196: {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197: {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198: {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199: {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200: {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201: {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202: {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203: {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204: {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205: {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206: {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207: {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208: #if HAVE_MBREGEX
209: {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210: {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211: {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212: {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213: {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214: #endif
215: {0, NULL, NULL, NULL}
216: };
217: /* }}} */
218:
219: /* {{{ arginfo */
220: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221: ZEND_ARG_INFO(0, language)
222: ZEND_END_ARG_INFO()
223:
224: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225: ZEND_ARG_INFO(0, encoding)
226: ZEND_END_ARG_INFO()
227:
228: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229: ZEND_ARG_INFO(0, type)
230: ZEND_END_ARG_INFO()
231:
232: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233: ZEND_ARG_INFO(0, encoding)
234: ZEND_END_ARG_INFO()
235:
236: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237: ZEND_ARG_INFO(0, encoding)
238: ZEND_END_ARG_INFO()
239:
240: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241: ZEND_ARG_INFO(0, substchar)
242: ZEND_END_ARG_INFO()
243:
244: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245: ZEND_ARG_INFO(0, encoding)
246: ZEND_END_ARG_INFO()
247:
248: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249: ZEND_ARG_INFO(0, encoded_string)
250: ZEND_ARG_INFO(1, result)
251: ZEND_END_ARG_INFO()
252:
253: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254: ZEND_ARG_INFO(0, contents)
255: ZEND_ARG_INFO(0, status)
256: ZEND_END_ARG_INFO()
257:
258: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259: ZEND_ARG_INFO(0, str)
260: ZEND_ARG_INFO(0, encoding)
261: ZEND_END_ARG_INFO()
262:
263: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264: ZEND_ARG_INFO(0, haystack)
265: ZEND_ARG_INFO(0, needle)
266: ZEND_ARG_INFO(0, offset)
267: ZEND_ARG_INFO(0, encoding)
268: ZEND_END_ARG_INFO()
269:
270: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271: ZEND_ARG_INFO(0, haystack)
272: ZEND_ARG_INFO(0, needle)
273: ZEND_ARG_INFO(0, offset)
274: ZEND_ARG_INFO(0, encoding)
275: ZEND_END_ARG_INFO()
276:
277: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278: ZEND_ARG_INFO(0, haystack)
279: ZEND_ARG_INFO(0, needle)
280: ZEND_ARG_INFO(0, offset)
281: ZEND_ARG_INFO(0, encoding)
282: ZEND_END_ARG_INFO()
283:
284: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285: ZEND_ARG_INFO(0, haystack)
286: ZEND_ARG_INFO(0, needle)
287: ZEND_ARG_INFO(0, offset)
288: ZEND_ARG_INFO(0, encoding)
289: ZEND_END_ARG_INFO()
290:
291: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292: ZEND_ARG_INFO(0, haystack)
293: ZEND_ARG_INFO(0, needle)
294: ZEND_ARG_INFO(0, part)
295: ZEND_ARG_INFO(0, encoding)
296: ZEND_END_ARG_INFO()
297:
298: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299: ZEND_ARG_INFO(0, haystack)
300: ZEND_ARG_INFO(0, needle)
301: ZEND_ARG_INFO(0, part)
302: ZEND_ARG_INFO(0, encoding)
303: ZEND_END_ARG_INFO()
304:
305: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306: ZEND_ARG_INFO(0, haystack)
307: ZEND_ARG_INFO(0, needle)
308: ZEND_ARG_INFO(0, part)
309: ZEND_ARG_INFO(0, encoding)
310: ZEND_END_ARG_INFO()
311:
312: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313: ZEND_ARG_INFO(0, haystack)
314: ZEND_ARG_INFO(0, needle)
315: ZEND_ARG_INFO(0, part)
316: ZEND_ARG_INFO(0, encoding)
317: ZEND_END_ARG_INFO()
318:
319: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320: ZEND_ARG_INFO(0, haystack)
321: ZEND_ARG_INFO(0, needle)
322: ZEND_ARG_INFO(0, encoding)
323: ZEND_END_ARG_INFO()
324:
325: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326: ZEND_ARG_INFO(0, str)
327: ZEND_ARG_INFO(0, start)
328: ZEND_ARG_INFO(0, length)
329: ZEND_ARG_INFO(0, encoding)
330: ZEND_END_ARG_INFO()
331:
332: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333: ZEND_ARG_INFO(0, str)
334: ZEND_ARG_INFO(0, start)
335: ZEND_ARG_INFO(0, length)
336: ZEND_ARG_INFO(0, encoding)
337: ZEND_END_ARG_INFO()
338:
339: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340: ZEND_ARG_INFO(0, str)
341: ZEND_ARG_INFO(0, encoding)
342: ZEND_END_ARG_INFO()
343:
344: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345: ZEND_ARG_INFO(0, str)
346: ZEND_ARG_INFO(0, start)
347: ZEND_ARG_INFO(0, width)
348: ZEND_ARG_INFO(0, trimmarker)
349: ZEND_ARG_INFO(0, encoding)
350: ZEND_END_ARG_INFO()
351:
352: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353: ZEND_ARG_INFO(0, str)
354: ZEND_ARG_INFO(0, to)
355: ZEND_ARG_INFO(0, from)
356: ZEND_END_ARG_INFO()
357:
358: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359: ZEND_ARG_INFO(0, sourcestring)
360: ZEND_ARG_INFO(0, mode)
361: ZEND_ARG_INFO(0, encoding)
362: ZEND_END_ARG_INFO()
363:
364: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365: ZEND_ARG_INFO(0, sourcestring)
366: ZEND_ARG_INFO(0, encoding)
367: ZEND_END_ARG_INFO()
368:
369: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370: ZEND_ARG_INFO(0, sourcestring)
371: ZEND_ARG_INFO(0, encoding)
372: ZEND_END_ARG_INFO()
373:
374: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375: ZEND_ARG_INFO(0, str)
376: ZEND_ARG_INFO(0, encoding_list)
377: ZEND_ARG_INFO(0, strict)
378: ZEND_END_ARG_INFO()
379:
380: ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381: ZEND_END_ARG_INFO()
382:
383: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384: ZEND_ARG_INFO(0, encoding)
385: ZEND_END_ARG_INFO()
386:
387: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388: ZEND_ARG_INFO(0, str)
389: ZEND_ARG_INFO(0, charset)
390: ZEND_ARG_INFO(0, transfer)
391: ZEND_ARG_INFO(0, linefeed)
392: ZEND_ARG_INFO(0, indent)
393: ZEND_END_ARG_INFO()
394:
395: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396: ZEND_ARG_INFO(0, string)
397: ZEND_END_ARG_INFO()
398:
399: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400: ZEND_ARG_INFO(0, str)
401: ZEND_ARG_INFO(0, option)
402: ZEND_ARG_INFO(0, encoding)
403: ZEND_END_ARG_INFO()
404:
405: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
406: ZEND_ARG_INFO(0, to)
407: ZEND_ARG_INFO(0, from)
408: ZEND_ARG_INFO(1, ...)
409: ZEND_END_ARG_INFO()
410:
411: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412: ZEND_ARG_INFO(0, string)
413: ZEND_ARG_INFO(0, convmap)
414: ZEND_ARG_INFO(0, encoding)
1.1.1.2 ! misho 415: ZEND_ARG_INFO(0, is_hex)
1.1 misho 416: ZEND_END_ARG_INFO()
417:
418: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419: ZEND_ARG_INFO(0, string)
420: ZEND_ARG_INFO(0, convmap)
421: ZEND_ARG_INFO(0, encoding)
422: ZEND_END_ARG_INFO()
423:
424: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425: ZEND_ARG_INFO(0, to)
426: ZEND_ARG_INFO(0, subject)
427: ZEND_ARG_INFO(0, message)
428: ZEND_ARG_INFO(0, additional_headers)
429: ZEND_ARG_INFO(0, additional_parameters)
430: ZEND_END_ARG_INFO()
431:
432: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433: ZEND_ARG_INFO(0, type)
434: ZEND_END_ARG_INFO()
435:
436: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437: ZEND_ARG_INFO(0, var)
438: ZEND_ARG_INFO(0, encoding)
439: ZEND_END_ARG_INFO()
440:
441: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442: ZEND_ARG_INFO(0, encoding)
443: ZEND_END_ARG_INFO()
444:
445: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446: ZEND_ARG_INFO(0, pattern)
447: ZEND_ARG_INFO(0, string)
448: ZEND_ARG_INFO(1, registers)
449: ZEND_END_ARG_INFO()
450:
451: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452: ZEND_ARG_INFO(0, pattern)
453: ZEND_ARG_INFO(0, string)
454: ZEND_ARG_INFO(1, registers)
455: ZEND_END_ARG_INFO()
456:
457: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458: ZEND_ARG_INFO(0, pattern)
459: ZEND_ARG_INFO(0, replacement)
460: ZEND_ARG_INFO(0, string)
461: ZEND_ARG_INFO(0, option)
462: ZEND_END_ARG_INFO()
463:
464: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465: ZEND_ARG_INFO(0, pattern)
466: ZEND_ARG_INFO(0, replacement)
467: ZEND_ARG_INFO(0, string)
468: ZEND_END_ARG_INFO()
469:
1.1.1.2 ! misho 470: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
! 471: ZEND_ARG_INFO(0, pattern)
! 472: ZEND_ARG_INFO(0, callback)
! 473: ZEND_ARG_INFO(0, string)
! 474: ZEND_ARG_INFO(0, option)
! 475: ZEND_END_ARG_INFO()
! 476:
1.1 misho 477: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478: ZEND_ARG_INFO(0, pattern)
479: ZEND_ARG_INFO(0, string)
480: ZEND_ARG_INFO(0, limit)
481: ZEND_END_ARG_INFO()
482:
483: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484: ZEND_ARG_INFO(0, pattern)
485: ZEND_ARG_INFO(0, string)
486: ZEND_ARG_INFO(0, option)
487: ZEND_END_ARG_INFO()
488:
489: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490: ZEND_ARG_INFO(0, pattern)
491: ZEND_ARG_INFO(0, option)
492: ZEND_END_ARG_INFO()
493:
494: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495: ZEND_ARG_INFO(0, pattern)
496: ZEND_ARG_INFO(0, option)
497: ZEND_END_ARG_INFO()
498:
499: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500: ZEND_ARG_INFO(0, pattern)
501: ZEND_ARG_INFO(0, option)
502: ZEND_END_ARG_INFO()
503:
504: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505: ZEND_ARG_INFO(0, string)
506: ZEND_ARG_INFO(0, pattern)
507: ZEND_ARG_INFO(0, option)
508: ZEND_END_ARG_INFO()
509:
510: ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511: ZEND_END_ARG_INFO()
512:
513: ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514: ZEND_END_ARG_INFO()
515:
516: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517: ZEND_ARG_INFO(0, position)
518: ZEND_END_ARG_INFO()
519:
520: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521: ZEND_ARG_INFO(0, options)
522: ZEND_END_ARG_INFO()
523: /* }}} */
524:
525: /* {{{ zend_function_entry mbstring_functions[] */
526: const zend_function_entry mbstring_functions[] = {
527: PHP_FE(mb_convert_case, arginfo_mb_convert_case)
528: PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
529: PHP_FE(mb_strtolower, arginfo_mb_strtolower)
530: PHP_FE(mb_language, arginfo_mb_language)
531: PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
532: PHP_FE(mb_http_input, arginfo_mb_http_input)
533: PHP_FE(mb_http_output, arginfo_mb_http_output)
534: PHP_FE(mb_detect_order, arginfo_mb_detect_order)
535: PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
536: PHP_FE(mb_parse_str, arginfo_mb_parse_str)
537: PHP_FE(mb_output_handler, arginfo_mb_output_handler)
538: PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
539: PHP_FE(mb_strlen, arginfo_mb_strlen)
540: PHP_FE(mb_strpos, arginfo_mb_strpos)
541: PHP_FE(mb_strrpos, arginfo_mb_strrpos)
542: PHP_FE(mb_stripos, arginfo_mb_stripos)
543: PHP_FE(mb_strripos, arginfo_mb_strripos)
544: PHP_FE(mb_strstr, arginfo_mb_strstr)
545: PHP_FE(mb_strrchr, arginfo_mb_strrchr)
546: PHP_FE(mb_stristr, arginfo_mb_stristr)
547: PHP_FE(mb_strrichr, arginfo_mb_strrichr)
548: PHP_FE(mb_substr_count, arginfo_mb_substr_count)
549: PHP_FE(mb_substr, arginfo_mb_substr)
550: PHP_FE(mb_strcut, arginfo_mb_strcut)
551: PHP_FE(mb_strwidth, arginfo_mb_strwidth)
552: PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
553: PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
554: PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
555: PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
556: PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
557: PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
558: PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
559: PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
560: PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
561: PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
562: PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
563: PHP_FE(mb_send_mail, arginfo_mb_send_mail)
564: PHP_FE(mb_get_info, arginfo_mb_get_info)
565: PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
566: #if HAVE_MBREGEX
567: PHP_MBREGEX_FUNCTION_ENTRIES
568: #endif
569: PHP_FE_END
570: };
571: /* }}} */
572:
573: /* {{{ zend_module_entry mbstring_module_entry */
574: zend_module_entry mbstring_module_entry = {
1.1.1.2 ! misho 575: STANDARD_MODULE_HEADER,
1.1 misho 576: "mbstring",
577: mbstring_functions,
578: PHP_MINIT(mbstring),
579: PHP_MSHUTDOWN(mbstring),
580: PHP_RINIT(mbstring),
581: PHP_RSHUTDOWN(mbstring),
582: PHP_MINFO(mbstring),
1.1.1.2 ! misho 583: NO_VERSION_YET,
! 584: PHP_MODULE_GLOBALS(mbstring),
! 585: PHP_GINIT(mbstring),
! 586: PHP_GSHUTDOWN(mbstring),
! 587: NULL,
1.1 misho 588: STANDARD_MODULE_PROPERTIES_EX
589: };
590: /* }}} */
591:
592: /* {{{ static sapi_post_entry php_post_entries[] */
593: static sapi_post_entry php_post_entries[] = {
594: { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
595: { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
596: { NULL, 0, NULL, NULL }
597: };
598: /* }}} */
599:
600: #ifdef COMPILE_DL_MBSTRING
601: ZEND_GET_MODULE(mbstring)
602: #endif
603:
604: /* {{{ allocators */
605: static void *_php_mb_allocators_malloc(unsigned int sz)
606: {
607: return emalloc(sz);
608: }
609:
610: static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
611: {
612: return erealloc(ptr, sz);
613: }
614:
615: static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
616: {
617: return ecalloc(nelems, szelem);
618: }
619:
620: static void _php_mb_allocators_free(void *ptr)
621: {
622: efree(ptr);
623: }
624:
625: static void *_php_mb_allocators_pmalloc(unsigned int sz)
626: {
627: return pemalloc(sz, 1);
628: }
629:
630: static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
631: {
632: return perealloc(ptr, sz, 1);
633: }
634:
635: static void _php_mb_allocators_pfree(void *ptr)
636: {
637: pefree(ptr, 1);
638: }
639:
640: static mbfl_allocators _php_mb_allocators = {
641: _php_mb_allocators_malloc,
642: _php_mb_allocators_realloc,
643: _php_mb_allocators_calloc,
644: _php_mb_allocators_free,
645: _php_mb_allocators_pmalloc,
646: _php_mb_allocators_prealloc,
647: _php_mb_allocators_pfree
648: };
649: /* }}} */
650:
651: /* {{{ static sapi_post_entry mbstr_post_entries[] */
652: static sapi_post_entry mbstr_post_entries[] = {
653: { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
654: { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
655: { NULL, 0, NULL, NULL }
656: };
657: /* }}} */
658:
659: /* {{{ static int php_mb_parse_encoding_list()
660: * Return 0 if input contains any illegal encoding, otherwise 1.
661: * Even if any illegal encoding is detected the result may contain a list
662: * of parsed encodings.
663: */
664: static int
1.1.1.2 ! misho 665: php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
1.1 misho 666: {
1.1.1.2 ! misho 667: int size, bauto, ret = SUCCESS;
! 668: size_t n;
1.1 misho 669: char *p, *p1, *p2, *endp, *tmpstr;
1.1.1.2 ! misho 670: const mbfl_encoding **entry, **list;
1.1 misho 671:
672: list = NULL;
673: if (value == NULL || value_length <= 0) {
674: if (return_list) {
675: *return_list = NULL;
676: }
677: if (return_size) {
678: *return_size = 0;
679: }
1.1.1.2 ! misho 680: return FAILURE;
1.1 misho 681: } else {
682: /* copy the value string for work */
683: if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
684: tmpstr = (char *)estrndup(value+1, value_length-2);
685: value_length -= 2;
686: }
687: else
688: tmpstr = (char *)estrndup(value, value_length);
689: if (tmpstr == NULL) {
1.1.1.2 ! misho 690: return FAILURE;
1.1 misho 691: }
692: /* count the number of listed encoding names */
693: endp = tmpstr + value_length;
694: n = 1;
695: p1 = tmpstr;
696: while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
697: p1 = p2 + 1;
698: n++;
699: }
1.1.1.2 ! misho 700: size = n + MBSTRG(default_detect_order_list_size);
1.1 misho 701: /* make list */
1.1.1.2 ! misho 702: list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
1.1 misho 703: if (list != NULL) {
704: entry = list;
705: n = 0;
706: bauto = 0;
707: p1 = tmpstr;
708: do {
709: p2 = p = php_memnstr(p1, ",", 1, endp);
710: if (p == NULL) {
711: p = endp;
712: }
713: *p = '\0';
714: /* trim spaces */
715: while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
716: p1++;
717: }
718: p--;
719: while (p > p1 && (*p == ' ' || *p == '\t')) {
720: *p = '\0';
721: p--;
722: }
723: /* convert to the encoding number and check encoding */
724: if (strcasecmp(p1, "auto") == 0) {
725: if (!bauto) {
1.1.1.2 ! misho 726: const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
! 727: const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
! 728: size_t i;
1.1 misho 729: bauto = 1;
1.1.1.2 ! misho 730: for (i = 0; i < identify_list_size; i++) {
! 731: *entry++ = mbfl_no2encoding(*src++);
1.1 misho 732: n++;
733: }
734: }
735: } else {
1.1.1.2 ! misho 736: const mbfl_encoding *encoding = mbfl_name2encoding(p1);
! 737: if (encoding) {
! 738: *entry++ = encoding;
1.1 misho 739: n++;
740: } else {
741: ret = 0;
742: }
743: }
744: p1 = p2 + 1;
745: } while (n < size && p2 != NULL);
746: if (n > 0) {
747: if (return_list) {
748: *return_list = list;
749: } else {
750: pefree(list, persistent);
751: }
752: } else {
753: pefree(list, persistent);
754: if (return_list) {
755: *return_list = NULL;
756: }
757: ret = 0;
758: }
759: if (return_size) {
760: *return_size = n;
761: }
762: } else {
763: if (return_list) {
764: *return_list = NULL;
765: }
766: if (return_size) {
767: *return_size = 0;
768: }
769: ret = 0;
770: }
771: efree(tmpstr);
772: }
773:
774: return ret;
775: }
776: /* }}} */
777:
778: /* {{{ static int php_mb_parse_encoding_array()
779: * Return 0 if input contains any illegal encoding, otherwise 1.
780: * Even if any illegal encoding is detected the result may contain a list
781: * of parsed encodings.
782: */
783: static int
1.1.1.2 ! misho 784: php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
1.1 misho 785: {
786: zval **hash_entry;
787: HashTable *target_hash;
1.1.1.2 ! misho 788: int i, n, size, bauto, ret = SUCCESS;
! 789: const mbfl_encoding **list, **entry;
1.1 misho 790:
791: list = NULL;
792: if (Z_TYPE_P(array) == IS_ARRAY) {
793: target_hash = Z_ARRVAL_P(array);
794: zend_hash_internal_pointer_reset(target_hash);
795: i = zend_hash_num_elements(target_hash);
1.1.1.2 ! misho 796: size = i + MBSTRG(default_detect_order_list_size);
! 797: list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
1.1 misho 798: if (list != NULL) {
799: entry = list;
800: bauto = 0;
801: n = 0;
802: while (i > 0) {
803: if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
804: break;
805: }
806: convert_to_string_ex(hash_entry);
807: if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
808: if (!bauto) {
1.1.1.2 ! misho 809: const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
! 810: const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
! 811: size_t j;
! 812:
1.1 misho 813: bauto = 1;
1.1.1.2 ! misho 814: for (j = 0; j < identify_list_size; j++) {
! 815: *entry++ = mbfl_no2encoding(*src++);
1.1 misho 816: n++;
817: }
818: }
819: } else {
1.1.1.2 ! misho 820: const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
! 821: if (encoding) {
! 822: *entry++ = encoding;
1.1 misho 823: n++;
824: } else {
1.1.1.2 ! misho 825: ret = FAILURE;
1.1 misho 826: }
827: }
828: zend_hash_move_forward(target_hash);
829: i--;
830: }
831: if (n > 0) {
832: if (return_list) {
833: *return_list = list;
834: } else {
835: pefree(list, persistent);
836: }
837: } else {
838: pefree(list, persistent);
839: if (return_list) {
840: *return_list = NULL;
841: }
1.1.1.2 ! misho 842: ret = FAILURE;
1.1 misho 843: }
844: if (return_size) {
845: *return_size = n;
846: }
847: } else {
848: if (return_list) {
849: *return_list = NULL;
850: }
851: if (return_size) {
852: *return_size = 0;
853: }
1.1.1.2 ! misho 854: ret = FAILURE;
1.1 misho 855: }
856: }
857:
858: return ret;
859: }
860: /* }}} */
861:
1.1.1.2 ! misho 862: /* {{{ zend_multibyte interface */
! 863: static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
! 864: {
! 865: return (const zend_encoding*)mbfl_name2encoding(encoding_name);
! 866: }
! 867:
! 868: static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
! 869: {
! 870: return ((const mbfl_encoding *)encoding)->name;
! 871: }
! 872:
! 873: static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
! 874: {
! 875: const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
! 876: if (encoding->flag & MBFL_ENCTYPE_SBCS) {
! 877: return 1;
! 878: }
! 879: if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
! 880: return 1;
! 881: }
! 882: return 0;
! 883: }
! 884:
! 885: static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
! 886: {
! 887: mbfl_string string;
! 888:
! 889: if (!list) {
! 890: list = (const zend_encoding **)MBSTRG(current_detect_order_list);
! 891: list_size = MBSTRG(current_detect_order_list_size);
! 892: }
! 893:
! 894: mbfl_string_init(&string);
! 895: string.no_language = MBSTRG(language);
! 896: string.val = (unsigned char *)arg_string;
! 897: string.len = arg_length;
! 898: return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
! 899: }
! 900:
! 901: static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
! 902: {
! 903: mbfl_string string, result;
! 904: mbfl_buffer_converter *convd;
! 905: int status, loc;
! 906:
! 907: /* new encoding */
! 908: /* initialize string */
! 909: mbfl_string_init(&string);
! 910: mbfl_string_init(&result);
! 911: string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
! 912: string.no_language = MBSTRG(language);
! 913: string.val = (unsigned char*)from;
! 914: string.len = from_length;
! 915:
! 916: /* initialize converter */
! 917: convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
! 918: if (convd == NULL) {
! 919: return -1;
! 920: }
! 921: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
! 922: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
! 923:
! 924: /* do it */
! 925: status = mbfl_buffer_converter_feed2(convd, &string, &loc);
! 926: if (status) {
! 927: mbfl_buffer_converter_delete(convd);
! 928: return (size_t)-1;
! 929: }
! 930:
! 931: mbfl_buffer_converter_flush(convd);
! 932: if (!mbfl_buffer_converter_result(convd, &result)) {
! 933: mbfl_buffer_converter_delete(convd);
! 934: return (size_t)-1;
! 935: }
! 936:
! 937: *to = result.val;
! 938: *to_length = result.len;
! 939:
! 940: mbfl_buffer_converter_delete(convd);
! 941:
! 942: return loc;
! 943: }
! 944:
! 945: static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
! 946: {
! 947: return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
! 948: }
! 949:
! 950: static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
! 951: {
! 952: return (const zend_encoding *)MBSTRG(internal_encoding);
! 953: }
! 954:
! 955: static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
! 956: {
! 957: MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
! 958: return SUCCESS;
! 959: }
! 960:
! 961: static zend_multibyte_functions php_mb_zend_multibyte_functions = {
! 962: "mbstring",
! 963: php_mb_zend_encoding_fetcher,
! 964: php_mb_zend_encoding_name_getter,
! 965: php_mb_zend_encoding_lexer_compatibility_checker,
! 966: php_mb_zend_encoding_detector,
! 967: php_mb_zend_encoding_converter,
! 968: php_mb_zend_encoding_list_parser,
! 969: php_mb_zend_internal_encoding_getter,
! 970: php_mb_zend_internal_encoding_setter
! 971: };
! 972: /* }}} */
! 973:
1.1 misho 974: static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
975: static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
976: static void _php_mb_free_regex(void *opaque);
977:
978: #if HAVE_ONIG
979: /* {{{ _php_mb_compile_regex */
980: static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
981: {
982: php_mb_regex_t *retval;
983: OnigErrorInfo err_info;
984: int err_code;
985:
986: if ((err_code = onig_new(&retval,
987: (const OnigUChar *)pattern,
988: (const OnigUChar *)pattern + strlen(pattern),
989: ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
990: ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
991: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
992: onig_error_code_to_str(err_str, err_code, err_info);
993: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
994: retval = NULL;
995: }
996: return retval;
997: }
998: /* }}} */
999:
1000: /* {{{ _php_mb_match_regex */
1001: static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1002: {
1003: return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1004: (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1005: (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1006: }
1007: /* }}} */
1008:
1009: /* {{{ _php_mb_free_regex */
1010: static void _php_mb_free_regex(void *opaque)
1011: {
1012: onig_free((php_mb_regex_t *)opaque);
1013: }
1014: /* }}} */
1015: #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1016: /* {{{ _php_mb_compile_regex */
1017: static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1018: {
1019: pcre *retval;
1020: const char *err_str;
1021: int err_offset;
1022:
1023: if (!(retval = pcre_compile(pattern,
1024: PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1025: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1026: }
1027: return retval;
1028: }
1029: /* }}} */
1030:
1031: /* {{{ _php_mb_match_regex */
1032: static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1033: {
1034: return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1035: 0, NULL, 0) >= 0;
1036: }
1037: /* }}} */
1038:
1039: /* {{{ _php_mb_free_regex */
1040: static void _php_mb_free_regex(void *opaque)
1041: {
1042: pcre_free(opaque);
1043: }
1044: /* }}} */
1045: #endif
1046:
1047: /* {{{ php_mb_nls_get_default_detect_order_list */
1.1.1.2 ! misho 1048: static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1.1 misho 1049: {
1050: size_t i;
1051:
1052: *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1053: *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1054:
1055: for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1056: if (php_mb_default_identify_list[i].lang == lang) {
1057: *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1058: *plist_size = php_mb_default_identify_list[i].list_size;
1059: return 1;
1060: }
1061: }
1062: return 0;
1063: }
1064: /* }}} */
1065:
1.1.1.2 ! misho 1066: static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
! 1067: {
! 1068: char *result = emalloc(len + 2);
! 1069: char *resp = result;
! 1070: int i;
! 1071:
! 1072: for (i = 0; i < len && start[i] != quote; ++i) {
! 1073: if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
! 1074: *resp++ = start[++i];
! 1075: } else {
! 1076: size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
! 1077:
! 1078: while (j-- > 0 && i < len) {
! 1079: *resp++ = start[i++];
! 1080: }
! 1081: --i;
! 1082: }
! 1083: }
! 1084:
! 1085: *resp = '\0';
! 1086: return result;
! 1087: }
! 1088:
! 1089: static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
! 1090: {
! 1091: char *pos = *line, quote;
! 1092: char *res;
! 1093:
! 1094: while (*pos && *pos != stop) {
! 1095: if ((quote = *pos) == '"' || quote == '\'') {
! 1096: ++pos;
! 1097: while (*pos && *pos != quote) {
! 1098: if (*pos == '\\' && pos[1] && pos[1] == quote) {
! 1099: pos += 2;
! 1100: } else {
! 1101: ++pos;
! 1102: }
! 1103: }
! 1104: if (*pos) {
! 1105: ++pos;
! 1106: }
! 1107: } else {
! 1108: pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
! 1109:
! 1110: }
! 1111: }
! 1112: if (*pos == '\0') {
! 1113: res = estrdup(*line);
! 1114: *line += strlen(*line);
! 1115: return res;
! 1116: }
! 1117:
! 1118: res = estrndup(*line, pos - *line);
! 1119:
! 1120: while (*pos == stop) {
! 1121: pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
! 1122: }
! 1123:
! 1124: *line = pos;
! 1125: return res;
! 1126: }
! 1127: /* }}} */
! 1128:
! 1129: static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
! 1130: {
! 1131: while (*str && isspace(*(unsigned char *)str)) {
! 1132: ++str;
! 1133: }
! 1134:
! 1135: if (!*str) {
! 1136: return estrdup("");
! 1137: }
! 1138:
! 1139: if (*str == '"' || *str == '\'') {
! 1140: char quote = *str;
! 1141:
! 1142: str++;
! 1143: return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
! 1144: } else {
! 1145: char *strend = str;
! 1146:
! 1147: while (*strend && !isspace(*(unsigned char *)strend)) {
! 1148: ++strend;
! 1149: }
! 1150: return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
! 1151: }
! 1152: }
! 1153: /* }}} */
! 1154:
! 1155: static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
! 1156: {
! 1157: char *s, *s2;
! 1158: const size_t filename_len = strlen(filename);
! 1159:
! 1160: /* The \ check should technically be needed for win32 systems only where
! 1161: * it is a valid path separator. However, IE in all it's wisdom always sends
! 1162: * the full path of the file on the user's filesystem, which means that unless
! 1163: * the user does basename() they get a bogus file name. Until IE's user base drops
! 1164: * to nill or problem is fixed this code must remain enabled for all systems. */
! 1165: s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
! 1166: s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
! 1167:
! 1168: if (s && s2) {
! 1169: if (s > s2) {
! 1170: return ++s;
! 1171: } else {
! 1172: return ++s2;
! 1173: }
! 1174: } else if (s) {
! 1175: return ++s;
! 1176: } else if (s2) {
! 1177: return ++s2;
! 1178: } else {
! 1179: return filename;
! 1180: }
! 1181: }
! 1182: /* }}} */
! 1183:
1.1 misho 1184: /* {{{ php.ini directive handler */
1185: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
1186: static PHP_INI_MH(OnUpdate_mbstring_language)
1187: {
1188: enum mbfl_no_language no_language;
1189:
1190: no_language = mbfl_name2no_language(new_value);
1191: if (no_language == mbfl_no_language_invalid) {
1192: MBSTRG(language) = mbfl_no_language_neutral;
1193: return FAILURE;
1194: }
1195: MBSTRG(language) = no_language;
1196: php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1197: return SUCCESS;
1198: }
1199: /* }}} */
1200:
1201: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
1202: static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1203: {
1.1.1.2 ! misho 1204: const mbfl_encoding **list;
! 1205: size_t size;
1.1 misho 1206:
1.1.1.2 ! misho 1207: if (!new_value) {
1.1 misho 1208: if (MBSTRG(detect_order_list)) {
1.1.1.2 ! misho 1209: pefree(MBSTRG(detect_order_list), 1);
1.1 misho 1210: }
1.1.1.2 ! misho 1211: MBSTRG(detect_order_list) = NULL;
! 1212: MBSTRG(detect_order_list_size) = 0;
! 1213: return SUCCESS;
! 1214: }
! 1215:
! 1216: if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1.1 misho 1217: return FAILURE;
1218: }
1219:
1.1.1.2 ! misho 1220: if (MBSTRG(detect_order_list)) {
! 1221: pefree(MBSTRG(detect_order_list), 1);
! 1222: }
! 1223: MBSTRG(detect_order_list) = list;
! 1224: MBSTRG(detect_order_list_size) = size;
1.1 misho 1225: return SUCCESS;
1226: }
1227: /* }}} */
1228:
1229: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1230: static PHP_INI_MH(OnUpdate_mbstring_http_input)
1231: {
1.1.1.2 ! misho 1232: const mbfl_encoding **list;
! 1233: size_t size;
1.1 misho 1234:
1.1.1.2 ! misho 1235: if (!new_value) {
1.1 misho 1236: if (MBSTRG(http_input_list)) {
1.1.1.2 ! misho 1237: pefree(MBSTRG(http_input_list), 1);
1.1 misho 1238: }
1.1.1.2 ! misho 1239: MBSTRG(http_input_list) = NULL;
1.1 misho 1240: MBSTRG(http_input_list_size) = 0;
1.1.1.2 ! misho 1241: return SUCCESS;
! 1242: }
! 1243:
! 1244: if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1.1 misho 1245: return FAILURE;
1246: }
1247:
1.1.1.2 ! misho 1248: if (MBSTRG(http_input_list)) {
! 1249: pefree(MBSTRG(http_input_list), 1);
! 1250: }
! 1251: MBSTRG(http_input_list) = list;
! 1252: MBSTRG(http_input_list_size) = size;
! 1253:
1.1 misho 1254: return SUCCESS;
1255: }
1256: /* }}} */
1257:
1258: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1259: static PHP_INI_MH(OnUpdate_mbstring_http_output)
1260: {
1.1.1.2 ! misho 1261: const mbfl_encoding *encoding;
1.1 misho 1262:
1.1.1.2 ! misho 1263: if (new_value == NULL || new_value_length == 0) {
! 1264: MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
! 1265: MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
! 1266: return SUCCESS;
! 1267: }
! 1268:
! 1269: encoding = mbfl_name2encoding(new_value);
! 1270: if (!encoding) {
! 1271: MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
! 1272: MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
! 1273: return FAILURE;
1.1 misho 1274: }
1275:
1.1.1.2 ! misho 1276: MBSTRG(http_output_encoding) = encoding;
! 1277: MBSTRG(current_http_output_encoding) = encoding;
1.1 misho 1278: return SUCCESS;
1279: }
1280: /* }}} */
1281:
1282: /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1283: int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1284: {
1.1.1.2 ! misho 1285: const mbfl_encoding *encoding;
! 1286:
! 1287: if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1.1 misho 1288: switch (MBSTRG(language)) {
1289: case mbfl_no_language_uni:
1.1.1.2 ! misho 1290: encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1.1 misho 1291: break;
1292: case mbfl_no_language_japanese:
1.1.1.2 ! misho 1293: encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
1.1 misho 1294: break;
1295: case mbfl_no_language_korean:
1.1.1.2 ! misho 1296: encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
1.1 misho 1297: break;
1298: case mbfl_no_language_simplified_chinese:
1.1.1.2 ! misho 1299: encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
1.1 misho 1300: break;
1301: case mbfl_no_language_traditional_chinese:
1.1.1.2 ! misho 1302: encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
1.1 misho 1303: break;
1304: case mbfl_no_language_russian:
1.1.1.2 ! misho 1305: encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
1.1 misho 1306: break;
1307: case mbfl_no_language_german:
1.1.1.2 ! misho 1308: encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
1.1 misho 1309: break;
1310: case mbfl_no_language_armenian:
1.1.1.2 ! misho 1311: encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
1.1 misho 1312: break;
1313: case mbfl_no_language_turkish:
1.1.1.2 ! misho 1314: encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
1.1 misho 1315: break;
1316: default:
1.1.1.2 ! misho 1317: encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
1.1 misho 1318: break;
1319: }
1320: }
1.1.1.2 ! misho 1321: MBSTRG(internal_encoding) = encoding;
! 1322: MBSTRG(current_internal_encoding) = encoding;
1.1 misho 1323: #if HAVE_MBREGEX
1324: {
1325: const char *enc_name = new_value;
1326: if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1327: /* falls back to EUC-JP if an unknown encoding name is given */
1328: enc_name = "EUC-JP";
1329: php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1330: }
1331: php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1332: }
1333: #endif
1334: return SUCCESS;
1335: }
1336: /* }}} */
1337:
1338: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1339: static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1340: {
1.1.1.2 ! misho 1341: if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
! 1342: return FAILURE;
! 1343: }
1.1 misho 1344: if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1345: || stage == PHP_INI_STAGE_RUNTIME) {
1346: return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1347: } else {
1348: /* the corresponding mbstring globals needs to be set according to the
1349: * ini value in the later stage because it never falls back to the
1350: * default value if 1. no value for mbstring.internal_encoding is given,
1351: * 2. mbstring.language directive is processed in per-dir or runtime
1352: * context and 3. call to the handler for mbstring.language is done
1353: * after mbstring.internal_encoding is handled. */
1354: return SUCCESS;
1355: }
1356: }
1357: /* }}} */
1358:
1359: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1360: static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1361: {
1362: int c;
1363: char *endptr = NULL;
1364:
1365: if (new_value != NULL) {
1366: if (strcasecmp("none", new_value) == 0) {
1367: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1368: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1369: } else if (strcasecmp("long", new_value) == 0) {
1370: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1371: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1372: } else if (strcasecmp("entity", new_value) == 0) {
1373: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1374: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1375: } else {
1376: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1377: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1378: if (new_value_length >0) {
1379: c = strtol(new_value, &endptr, 0);
1380: if (*endptr == '\0') {
1381: MBSTRG(filter_illegal_substchar) = c;
1382: MBSTRG(current_filter_illegal_substchar) = c;
1383: }
1384: }
1385: }
1386: } else {
1387: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1388: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1389: MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1390: MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1391: }
1392:
1393: return SUCCESS;
1394: }
1395: /* }}} */
1396:
1397: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1398: static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1399: {
1400: if (new_value == NULL) {
1.1.1.2 ! misho 1401: return FAILURE;
1.1 misho 1402: }
1403:
1404: OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1405:
1406: if (MBSTRG(encoding_translation)) {
1407: sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1408: sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1409: } else {
1410: sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1411: sapi_register_post_entries(php_post_entries TSRMLS_CC);
1412: }
1413:
1414: return SUCCESS;
1415: }
1416: /* }}} */
1417:
1418: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1419: static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1420: {
1421: zval tmp;
1422: void *re = NULL;
1423:
1424: if (!new_value) {
1425: new_value = entry->orig_value;
1426: new_value_length = entry->orig_value_length;
1427: }
1428: php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1429:
1430: if (Z_STRLEN(tmp) > 0) {
1431: if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1432: zval_dtor(&tmp);
1433: return FAILURE;
1434: }
1435: }
1436:
1437: if (MBSTRG(http_output_conv_mimetypes)) {
1438: _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1439: }
1440:
1441: MBSTRG(http_output_conv_mimetypes) = re;
1442:
1443: zval_dtor(&tmp);
1444: return SUCCESS;
1445: }
1446: /* }}} */
1447: /* }}} */
1448:
1449: /* {{{ php.ini directive registration */
1450: PHP_INI_BEGIN()
1451: PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1452: PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1453: PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1454: PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1.1.1.2 ! misho 1455: STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1.1 misho 1456: PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1457: STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1458: PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1459:
1460: STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1461: PHP_INI_SYSTEM | PHP_INI_PERDIR,
1462: OnUpdate_mbstring_encoding_translation,
1463: encoding_translation, zend_mbstring_globals, mbstring_globals)
1464: PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1465: "^(text/|application/xhtml\\+xml)",
1466: PHP_INI_ALL,
1467: OnUpdate_mbstring_http_output_conv_mimetypes)
1468:
1469: STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1470: PHP_INI_ALL,
1471: OnUpdateLong,
1472: strict_detection, zend_mbstring_globals, mbstring_globals)
1473: PHP_INI_END()
1474: /* }}} */
1475:
1476: /* {{{ module global initialize handler */
1477: static PHP_GINIT_FUNCTION(mbstring)
1478: {
1479: mbstring_globals->language = mbfl_no_language_uni;
1.1.1.2 ! misho 1480: mbstring_globals->internal_encoding = NULL;
1.1 misho 1481: mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1.1.1.2 ! misho 1482: mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
! 1483: mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
! 1484: mbstring_globals->http_input_identify = NULL;
! 1485: mbstring_globals->http_input_identify_get = NULL;
! 1486: mbstring_globals->http_input_identify_post = NULL;
! 1487: mbstring_globals->http_input_identify_cookie = NULL;
! 1488: mbstring_globals->http_input_identify_string = NULL;
1.1 misho 1489: mbstring_globals->http_input_list = NULL;
1490: mbstring_globals->http_input_list_size = 0;
1491: mbstring_globals->detect_order_list = NULL;
1492: mbstring_globals->detect_order_list_size = 0;
1493: mbstring_globals->current_detect_order_list = NULL;
1494: mbstring_globals->current_detect_order_list_size = 0;
1495: mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1496: mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1497: mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1498: mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1499: mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1500: mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1501: mbstring_globals->illegalchars = 0;
1502: mbstring_globals->func_overload = 0;
1503: mbstring_globals->encoding_translation = 0;
1504: mbstring_globals->strict_detection = 0;
1505: mbstring_globals->outconv = NULL;
1506: mbstring_globals->http_output_conv_mimetypes = NULL;
1507: #if HAVE_MBREGEX
1508: mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1509: #endif
1510: }
1511: /* }}} */
1512:
1513: /* {{{ PHP_GSHUTDOWN_FUNCTION */
1514: static PHP_GSHUTDOWN_FUNCTION(mbstring)
1515: {
1516: if (mbstring_globals->http_input_list) {
1517: free(mbstring_globals->http_input_list);
1518: }
1519: if (mbstring_globals->detect_order_list) {
1520: free(mbstring_globals->detect_order_list);
1521: }
1522: if (mbstring_globals->http_output_conv_mimetypes) {
1523: _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1524: }
1525: #if HAVE_MBREGEX
1526: php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1527: #endif
1528: }
1529: /* }}} */
1530:
1531: /* {{{ PHP_MINIT_FUNCTION(mbstring) */
1532: PHP_MINIT_FUNCTION(mbstring)
1533: {
1534: __mbfl_allocators = &_php_mb_allocators;
1535:
1536: REGISTER_INI_ENTRIES();
1537:
1538: /* This is a global handler. Should not be set in a per-request handler. */
1.1.1.2 ! misho 1539: sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1.1 misho 1540:
1541: /* Post handlers are stored in the thread-local context. */
1542: if (MBSTRG(encoding_translation)) {
1543: sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1544: }
1545:
1546: REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1547: REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1548: REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1549:
1550: REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1551: REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1552: REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1553:
1554: #if HAVE_MBREGEX
1555: PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1556: #endif
1.1.1.2 ! misho 1557:
! 1558: if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
! 1559: return FAILURE;
! 1560: }
! 1561:
! 1562: php_rfc1867_set_multibyte_callbacks(
! 1563: php_mb_encoding_translation,
! 1564: php_mb_gpc_get_detect_order,
! 1565: php_mb_gpc_set_input_encoding,
! 1566: php_mb_rfc1867_getword,
! 1567: php_mb_rfc1867_getword_conf,
! 1568: php_mb_rfc1867_basename);
! 1569:
1.1 misho 1570: return SUCCESS;
1571: }
1572: /* }}} */
1573:
1574: /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1575: PHP_MSHUTDOWN_FUNCTION(mbstring)
1576: {
1577: UNREGISTER_INI_ENTRIES();
1.1.1.2 ! misho 1578:
1.1 misho 1579: #if HAVE_MBREGEX
1580: PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1581: #endif
1582:
1583: return SUCCESS;
1584: }
1585: /* }}} */
1586:
1587: /* {{{ PHP_RINIT_FUNCTION(mbstring) */
1588: PHP_RINIT_FUNCTION(mbstring)
1589: {
1590: zend_function *func, *orig;
1591: const struct mb_overload_def *p;
1592:
1593: MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1594: MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1595: MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1596: MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1597:
1598: MBSTRG(illegalchars) = 0;
1599:
1.1.1.2 ! misho 1600: php_mb_populate_current_detect_order_list(TSRMLS_C);
1.1 misho 1601:
1602: /* override original function. */
1603: if (MBSTRG(func_overload)){
1604: p = &(mb_ovld[0]);
1605:
1606: while (p->type > 0) {
1607: if ((MBSTRG(func_overload) & p->type) == p->type &&
1608: zend_hash_find(EG(function_table), p->save_func,
1609: strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1610:
1611: zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1612:
1613: if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1614: php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1615: return FAILURE;
1616: } else {
1617: zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1618:
1619: if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1620: NULL) == FAILURE) {
1621: php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1622: return FAILURE;
1623: }
1624: }
1625: }
1626: p++;
1627: }
1628: }
1629: #if HAVE_MBREGEX
1630: PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1631: #endif
1.1.1.2 ! misho 1632: zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1.1 misho 1633:
1634: return SUCCESS;
1635: }
1636: /* }}} */
1637:
1638: /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1639: PHP_RSHUTDOWN_FUNCTION(mbstring)
1640: {
1641: const struct mb_overload_def *p;
1642: zend_function *orig;
1643:
1644: if (MBSTRG(current_detect_order_list) != NULL) {
1645: efree(MBSTRG(current_detect_order_list));
1646: MBSTRG(current_detect_order_list) = NULL;
1647: MBSTRG(current_detect_order_list_size) = 0;
1648: }
1649: if (MBSTRG(outconv) != NULL) {
1650: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1651: mbfl_buffer_converter_delete(MBSTRG(outconv));
1652: MBSTRG(outconv) = NULL;
1653: }
1654:
1655: /* clear http input identification. */
1.1.1.2 ! misho 1656: MBSTRG(http_input_identify) = NULL;
! 1657: MBSTRG(http_input_identify_post) = NULL;
! 1658: MBSTRG(http_input_identify_get) = NULL;
! 1659: MBSTRG(http_input_identify_cookie) = NULL;
! 1660: MBSTRG(http_input_identify_string) = NULL;
1.1 misho 1661:
1662: /* clear overloaded function. */
1663: if (MBSTRG(func_overload)){
1664: p = &(mb_ovld[0]);
1665: while (p->type > 0) {
1666: if ((MBSTRG(func_overload) & p->type) == p->type &&
1667: zend_hash_find(EG(function_table), p->save_func,
1668: strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1669:
1670: zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1671: zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1672: }
1673: p++;
1674: }
1675: }
1676:
1677: #if HAVE_MBREGEX
1678: PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1679: #endif
1680:
1681: return SUCCESS;
1682: }
1683: /* }}} */
1684:
1685: /* {{{ PHP_MINFO_FUNCTION(mbstring) */
1686: PHP_MINFO_FUNCTION(mbstring)
1687: {
1688: php_info_print_table_start();
1689: php_info_print_table_row(2, "Multibyte Support", "enabled");
1690: php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1.1.1.2 ! misho 1691: php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
! 1692: {
! 1693: char tmp[256];
! 1694: snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
! 1695: php_info_print_table_row(2, "libmbfl version", tmp);
! 1696: }
1.1 misho 1697: php_info_print_table_end();
1698:
1699: php_info_print_table_start();
1700: php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1701: php_info_print_table_end();
1702:
1703: #if HAVE_MBREGEX
1704: PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1705: #endif
1706:
1707: DISPLAY_INI_ENTRIES();
1708: }
1709: /* }}} */
1710:
1711: /* {{{ proto string mb_language([string language])
1712: Sets the current language or Returns the current language as a string */
1713: PHP_FUNCTION(mb_language)
1714: {
1715: char *name = NULL;
1716: int name_len = 0;
1717:
1718: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1719: return;
1720: }
1721: if (name == NULL) {
1722: RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1723: } else {
1724: if (FAILURE == zend_alter_ini_entry(
1725: "mbstring.language", sizeof("mbstring.language"),
1726: name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1727: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1728: RETVAL_FALSE;
1729: } else {
1730: RETVAL_TRUE;
1731: }
1732: }
1733: }
1734: /* }}} */
1735:
1736: /* {{{ proto string mb_internal_encoding([string encoding])
1737: Sets the current internal encoding or Returns the current internal encoding as a string */
1738: PHP_FUNCTION(mb_internal_encoding)
1739: {
1.1.1.2 ! misho 1740: const char *name = NULL;
1.1 misho 1741: int name_len;
1.1.1.2 ! misho 1742: const mbfl_encoding *encoding;
1.1 misho 1743:
1744: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1745: RETURN_FALSE;
1746: }
1747: if (name == NULL) {
1.1.1.2 ! misho 1748: name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1.1 misho 1749: if (name != NULL) {
1750: RETURN_STRING(name, 1);
1751: } else {
1752: RETURN_FALSE;
1753: }
1754: } else {
1.1.1.2 ! misho 1755: encoding = mbfl_name2encoding(name);
! 1756: if (!encoding) {
1.1 misho 1757: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1758: RETURN_FALSE;
1759: } else {
1.1.1.2 ! misho 1760: MBSTRG(current_internal_encoding) = encoding;
1.1 misho 1761: RETURN_TRUE;
1762: }
1763: }
1764: }
1765: /* }}} */
1766:
1767: /* {{{ proto mixed mb_http_input([string type])
1768: Returns the input encoding */
1769: PHP_FUNCTION(mb_http_input)
1770: {
1771: char *typ = NULL;
1772: int typ_len;
1.1.1.2 ! misho 1773: int retname;
! 1774: char *list, *temp;
! 1775: const mbfl_encoding *result = NULL;
1.1 misho 1776:
1777: retname = 1;
1778: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1779: RETURN_FALSE;
1780: }
1781: if (typ == NULL) {
1782: result = MBSTRG(http_input_identify);
1783: } else {
1784: switch (*typ) {
1785: case 'G':
1786: case 'g':
1787: result = MBSTRG(http_input_identify_get);
1788: break;
1789: case 'P':
1790: case 'p':
1791: result = MBSTRG(http_input_identify_post);
1792: break;
1793: case 'C':
1794: case 'c':
1795: result = MBSTRG(http_input_identify_cookie);
1796: break;
1797: case 'S':
1798: case 's':
1799: result = MBSTRG(http_input_identify_string);
1800: break;
1801: case 'I':
1802: case 'i':
1.1.1.2 ! misho 1803: {
! 1804: const mbfl_encoding **entry = MBSTRG(http_input_list);
! 1805: const size_t n = MBSTRG(http_input_list_size);
! 1806: size_t i;
! 1807: array_init(return_value);
! 1808: for (i = 0; i < n; i++) {
! 1809: add_next_index_string(return_value, (*entry)->name, 1);
! 1810: entry++;
1.1 misho 1811: }
1.1.1.2 ! misho 1812: retname = 0;
1.1 misho 1813: }
1814: break;
1815: case 'L':
1816: case 'l':
1.1.1.2 ! misho 1817: {
! 1818: const mbfl_encoding **entry = MBSTRG(http_input_list);
! 1819: const size_t n = MBSTRG(http_input_list_size);
! 1820: size_t i;
! 1821: list = NULL;
! 1822: for (i = 0; i < n; i++) {
1.1 misho 1823: if (list) {
1824: temp = list;
1.1.1.2 ! misho 1825: spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1.1 misho 1826: efree(temp);
1827: if (!list) {
1828: break;
1829: }
1830: } else {
1.1.1.2 ! misho 1831: list = estrdup((*entry)->name);
1.1 misho 1832: }
1.1.1.2 ! misho 1833: entry++;
1.1 misho 1834: }
1835: }
1836: if (!list) {
1837: RETURN_FALSE;
1838: }
1839: RETVAL_STRING(list, 0);
1840: retname = 0;
1841: break;
1842: default:
1843: result = MBSTRG(http_input_identify);
1844: break;
1845: }
1846: }
1847:
1848: if (retname) {
1.1.1.2 ! misho 1849: if (result) {
! 1850: RETVAL_STRING(result->name, 1);
1.1 misho 1851: } else {
1852: RETVAL_FALSE;
1853: }
1854: }
1855: }
1856: /* }}} */
1857:
1858: /* {{{ proto string mb_http_output([string encoding])
1859: Sets the current output_encoding or returns the current output_encoding as a string */
1860: PHP_FUNCTION(mb_http_output)
1861: {
1.1.1.2 ! misho 1862: const char *name = NULL;
1.1 misho 1863: int name_len;
1.1.1.2 ! misho 1864: const mbfl_encoding *encoding;
1.1 misho 1865:
1866: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1867: RETURN_FALSE;
1868: }
1869:
1870: if (name == NULL) {
1.1.1.2 ! misho 1871: name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1.1 misho 1872: if (name != NULL) {
1873: RETURN_STRING(name, 1);
1874: } else {
1875: RETURN_FALSE;
1876: }
1877: } else {
1.1.1.2 ! misho 1878: encoding = mbfl_name2encoding(name);
! 1879: if (!encoding) {
1.1 misho 1880: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1881: RETURN_FALSE;
1882: } else {
1.1.1.2 ! misho 1883: MBSTRG(current_http_output_encoding) = encoding;
1.1 misho 1884: RETURN_TRUE;
1885: }
1886: }
1887: }
1888: /* }}} */
1889:
1890: /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1891: Sets the current detect_order or Return the current detect_order as a array */
1892: PHP_FUNCTION(mb_detect_order)
1893: {
1894: zval **arg1 = NULL;
1895:
1896: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1897: return;
1898: }
1899:
1900: if (!arg1) {
1.1.1.2 ! misho 1901: size_t i;
! 1902: size_t n = MBSTRG(current_detect_order_list_size);
! 1903: const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1.1 misho 1904: array_init(return_value);
1.1.1.2 ! misho 1905: for (i = 0; i < n; i++) {
! 1906: add_next_index_string(return_value, (*entry)->name, 1);
1.1 misho 1907: entry++;
1908: }
1909: } else {
1.1.1.2 ! misho 1910: const mbfl_encoding **list = NULL;
! 1911: size_t size = 0;
1.1 misho 1912: switch (Z_TYPE_PP(arg1)) {
1913: case IS_ARRAY:
1.1.1.2 ! misho 1914: if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1.1 misho 1915: if (list) {
1916: efree(list);
1917: }
1918: RETURN_FALSE;
1919: }
1920: break;
1921: default:
1922: convert_to_string_ex(arg1);
1.1.1.2 ! misho 1923: if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1.1 misho 1924: if (list) {
1925: efree(list);
1926: }
1927: RETURN_FALSE;
1928: }
1929: break;
1930: }
1931:
1932: if (list == NULL) {
1933: RETURN_FALSE;
1934: }
1935:
1936: if (MBSTRG(current_detect_order_list)) {
1937: efree(MBSTRG(current_detect_order_list));
1938: }
1939: MBSTRG(current_detect_order_list) = list;
1940: MBSTRG(current_detect_order_list_size) = size;
1941: RETURN_TRUE;
1942: }
1943: }
1944: /* }}} */
1945:
1946: /* {{{ proto mixed mb_substitute_character([mixed substchar])
1947: Sets the current substitute_character or returns the current substitute_character */
1948: PHP_FUNCTION(mb_substitute_character)
1949: {
1950: zval **arg1 = NULL;
1951:
1952: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1953: return;
1954: }
1955:
1956: if (!arg1) {
1957: if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1958: RETURN_STRING("none", 1);
1959: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1960: RETURN_STRING("long", 1);
1961: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1962: RETURN_STRING("entity", 1);
1963: } else {
1964: RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1965: }
1966: } else {
1967: RETVAL_TRUE;
1968:
1969: switch (Z_TYPE_PP(arg1)) {
1970: case IS_STRING:
1971: if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1972: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1973: } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1974: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1975: } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1976: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1977: } else {
1978: convert_to_long_ex(arg1);
1979:
1980: if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1981: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1982: MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1983: } else {
1984: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1985: RETURN_FALSE;
1986: }
1987: }
1988: break;
1989: default:
1990: convert_to_long_ex(arg1);
1991: if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1992: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1993: MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1994: } else {
1995: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1996: RETURN_FALSE;
1997: }
1998: break;
1999: }
2000: }
2001: }
2002: /* }}} */
2003:
2004: /* {{{ proto string mb_preferred_mime_name(string encoding)
2005: Return the preferred MIME name (charset) as a string */
2006: PHP_FUNCTION(mb_preferred_mime_name)
2007: {
2008: enum mbfl_no_encoding no_encoding;
2009: char *name = NULL;
2010: int name_len;
2011:
2012: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2013: return;
2014: } else {
2015: no_encoding = mbfl_name2no_encoding(name);
2016: if (no_encoding == mbfl_no_encoding_invalid) {
2017: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2018: RETVAL_FALSE;
2019: } else {
2020: const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2021: if (preferred_name == NULL || *preferred_name == '\0') {
2022: php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2023: RETVAL_FALSE;
2024: } else {
2025: RETVAL_STRING((char *)preferred_name, 1);
2026: }
2027: }
2028: }
2029: }
2030: /* }}} */
2031:
2032: #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2033: #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2034:
2035: /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2036: Parses GET/POST/COOKIE data and sets global variables */
2037: PHP_FUNCTION(mb_parse_str)
2038: {
2039: zval *track_vars_array = NULL;
2040: char *encstr = NULL;
2041: int encstr_len;
2042: php_mb_encoding_handler_info_t info;
1.1.1.2 ! misho 2043: const mbfl_encoding *detected;
1.1 misho 2044:
2045: track_vars_array = NULL;
2046: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2047: return;
2048: }
2049:
2050: if (track_vars_array != NULL) {
1.1.1.2 ! misho 2051: /* Clear out the array */
1.1 misho 2052: zval_dtor(track_vars_array);
2053: array_init(track_vars_array);
2054: }
2055:
2056: encstr = estrndup(encstr, encstr_len);
2057:
2058: info.data_type = PARSE_STRING;
2059: info.separator = PG(arg_separator).input;
2060: info.report_errors = 1;
2061: info.to_encoding = MBSTRG(current_internal_encoding);
2062: info.to_language = MBSTRG(language);
2063: info.from_encodings = MBSTRG(http_input_list);
2064: info.num_from_encodings = MBSTRG(http_input_list_size);
2065: info.from_language = MBSTRG(language);
2066:
1.1.1.2 ! misho 2067: if (track_vars_array != NULL) {
! 2068: detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
! 2069: } else {
! 2070: zval tmp;
! 2071: if (!EG(active_symbol_table)) {
! 2072: zend_rebuild_symbol_table(TSRMLS_C);
! 2073: }
! 2074: Z_ARRVAL(tmp) = EG(active_symbol_table);
! 2075: detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
! 2076: }
1.1 misho 2077:
2078: MBSTRG(http_input_identify) = detected;
2079:
1.1.1.2 ! misho 2080: RETVAL_BOOL(detected);
1.1 misho 2081:
2082: if (encstr != NULL) efree(encstr);
2083: }
2084: /* }}} */
2085:
2086: /* {{{ proto string mb_output_handler(string contents, int status)
2087: Returns string in output buffer converted to the http_output encoding */
2088: PHP_FUNCTION(mb_output_handler)
2089: {
2090: char *arg_string;
2091: int arg_string_len;
2092: long arg_status;
2093: mbfl_string string, result;
2094: const char *charset;
2095: char *p;
1.1.1.2 ! misho 2096: const mbfl_encoding *encoding;
1.1 misho 2097: int last_feed, len;
2098: unsigned char send_text_mimetype = 0;
2099: char *s, *mimetype = NULL;
2100:
2101: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2102: return;
2103: }
2104:
2105: encoding = MBSTRG(current_http_output_encoding);
2106:
2107: /* start phase only */
2108: if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2109: /* delete the converter just in case. */
2110: if (MBSTRG(outconv)) {
2111: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2112: mbfl_buffer_converter_delete(MBSTRG(outconv));
2113: MBSTRG(outconv) = NULL;
2114: }
1.1.1.2 ! misho 2115: if (encoding == &mbfl_encoding_pass) {
1.1 misho 2116: RETURN_STRINGL(arg_string, arg_string_len, 1);
2117: }
2118:
2119: /* analyze mime type */
2120: if (SG(sapi_headers).mimetype &&
2121: _php_mb_match_regex(
2122: MBSTRG(http_output_conv_mimetypes),
2123: SG(sapi_headers).mimetype,
2124: strlen(SG(sapi_headers).mimetype))) {
2125: if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2126: mimetype = estrdup(SG(sapi_headers).mimetype);
2127: } else {
2128: mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2129: }
2130: send_text_mimetype = 1;
2131: } else if (SG(sapi_headers).send_default_content_type) {
2132: mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2133: }
2134:
2135: /* if content-type is not yet set, set it and activate the converter */
2136: if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1.1.1.2 ! misho 2137: charset = encoding->mime_name;
1.1 misho 2138: if (charset) {
2139: len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2140: if (sapi_add_header(p, len, 0) != FAILURE) {
2141: SG(sapi_headers).send_default_content_type = 0;
2142: }
2143: }
2144: /* activate the converter */
1.1.1.2 ! misho 2145: MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
1.1 misho 2146: if (send_text_mimetype){
2147: efree(mimetype);
2148: }
2149: }
2150: }
2151:
2152: /* just return if the converter is not activated. */
2153: if (MBSTRG(outconv) == NULL) {
2154: RETURN_STRINGL(arg_string, arg_string_len, 1);
2155: }
2156:
2157: /* flag */
2158: last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2159: /* mode */
2160: mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2161: mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2162:
2163: /* feed the string */
2164: mbfl_string_init(&string);
2165: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 2166: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2167: string.val = (unsigned char *)arg_string;
2168: string.len = arg_string_len;
2169: mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2170: if (last_feed) {
2171: mbfl_buffer_converter_flush(MBSTRG(outconv));
2172: }
2173: /* get the converter output, and return it */
2174: mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2175: RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
2176:
2177: /* delete the converter if it is the last feed. */
2178: if (last_feed) {
2179: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2180: mbfl_buffer_converter_delete(MBSTRG(outconv));
2181: MBSTRG(outconv) = NULL;
2182: }
2183: }
2184: /* }}} */
2185:
2186: /* {{{ proto int mb_strlen(string str [, string encoding])
2187: Get character numbers of a string */
2188: PHP_FUNCTION(mb_strlen)
2189: {
2190: int n;
2191: mbfl_string string;
2192: char *enc_name = NULL;
2193: int enc_name_len;
2194:
2195: mbfl_string_init(&string);
2196:
2197: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2198: RETURN_FALSE;
2199: }
2200:
2201: string.no_language = MBSTRG(language);
2202: if (enc_name == NULL) {
1.1.1.2 ! misho 2203: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2204: } else {
2205: string.no_encoding = mbfl_name2no_encoding(enc_name);
2206: if (string.no_encoding == mbfl_no_encoding_invalid) {
2207: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2208: RETURN_FALSE;
2209: }
2210: }
2211:
2212: n = mbfl_strlen(&string);
2213: if (n >= 0) {
2214: RETVAL_LONG(n);
2215: } else {
2216: RETVAL_FALSE;
2217: }
2218: }
2219: /* }}} */
2220:
2221: /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2222: Find position of first occurrence of a string within another */
2223: PHP_FUNCTION(mb_strpos)
2224: {
2225: int n, reverse = 0;
2226: long offset;
2227: mbfl_string haystack, needle;
2228: char *enc_name = NULL;
2229: int enc_name_len;
1.1.1.2 ! misho 2230:
1.1 misho 2231: mbfl_string_init(&haystack);
2232: mbfl_string_init(&needle);
2233: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2234: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2235: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2236: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2237: offset = 0;
2238:
2239: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2240: RETURN_FALSE;
2241: }
2242:
2243: if (enc_name != NULL) {
2244: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2245: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2246: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2247: RETURN_FALSE;
2248: }
2249: }
2250:
2251: if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2252: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2253: RETURN_FALSE;
2254: }
2255: if (needle.len == 0) {
2256: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2257: RETURN_FALSE;
2258: }
2259:
2260: n = mbfl_strpos(&haystack, &needle, offset, reverse);
2261: if (n >= 0) {
2262: RETVAL_LONG(n);
2263: } else {
2264: switch (-n) {
2265: case 1:
2266: break;
2267: case 2:
2268: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2269: break;
2270: case 4:
2271: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2272: break;
2273: case 8:
2274: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2275: break;
2276: default:
2277: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2278: break;
2279: }
2280: RETVAL_FALSE;
2281: }
2282: }
2283: /* }}} */
2284:
2285: /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2286: Find position of last occurrence of a string within another */
2287: PHP_FUNCTION(mb_strrpos)
2288: {
2289: int n;
2290: mbfl_string haystack, needle;
2291: char *enc_name = NULL;
2292: int enc_name_len;
2293: zval **zoffset = NULL;
2294: long offset = 0, str_flg;
2295: char *enc_name2 = NULL;
2296: int enc_name_len2;
2297:
2298: mbfl_string_init(&haystack);
2299: mbfl_string_init(&needle);
2300: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2301: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2302: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2303: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2304:
2305: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2306: RETURN_FALSE;
2307: }
2308:
2309: if (zoffset) {
2310: if (Z_TYPE_PP(zoffset) == IS_STRING) {
2311: enc_name2 = Z_STRVAL_PP(zoffset);
2312: enc_name_len2 = Z_STRLEN_PP(zoffset);
2313: str_flg = 1;
2314:
2315: if (enc_name2 != NULL) {
2316: switch (*enc_name2) {
2317: case '0':
2318: case '1':
2319: case '2':
2320: case '3':
2321: case '4':
2322: case '5':
2323: case '6':
2324: case '7':
2325: case '8':
2326: case '9':
2327: case ' ':
2328: case '-':
2329: case '.':
2330: break;
2331: default :
2332: str_flg = 0;
2333: break;
2334: }
2335: }
2336:
2337: if (str_flg) {
2338: convert_to_long_ex(zoffset);
2339: offset = Z_LVAL_PP(zoffset);
2340: } else {
2341: enc_name = enc_name2;
2342: enc_name_len = enc_name_len2;
2343: }
2344: } else {
2345: convert_to_long_ex(zoffset);
2346: offset = Z_LVAL_PP(zoffset);
2347: }
2348: }
2349:
2350: if (enc_name != NULL) {
2351: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2352: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2353: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2354: RETURN_FALSE;
2355: }
2356: }
2357:
2358: if (haystack.len <= 0) {
2359: RETURN_FALSE;
2360: }
2361: if (needle.len <= 0) {
2362: RETURN_FALSE;
2363: }
2364:
2365: {
2366: int haystack_char_len = mbfl_strlen(&haystack);
2367: if ((offset > 0 && offset > haystack_char_len) ||
2368: (offset < 0 && -offset > haystack_char_len)) {
2369: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2370: RETURN_FALSE;
2371: }
2372: }
2373:
2374: n = mbfl_strpos(&haystack, &needle, offset, 1);
2375: if (n >= 0) {
2376: RETVAL_LONG(n);
2377: } else {
2378: RETVAL_FALSE;
2379: }
2380: }
2381: /* }}} */
2382:
2383: /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2384: Finds position of first occurrence of a string within another, case insensitive */
2385: PHP_FUNCTION(mb_stripos)
2386: {
2387: int n;
2388: long offset;
2389: mbfl_string haystack, needle;
1.1.1.2 ! misho 2390: const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
1.1 misho 2391: int from_encoding_len;
2392: n = -1;
2393: offset = 0;
2394:
2395: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2396: RETURN_FALSE;
2397: }
2398: if (needle.len == 0) {
2399: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2400: RETURN_FALSE;
2401: }
2402: n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2403:
2404: if (n >= 0) {
2405: RETVAL_LONG(n);
2406: } else {
2407: RETVAL_FALSE;
2408: }
2409: }
2410: /* }}} */
2411:
2412: /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2413: Finds position of last occurrence of a string within another, case insensitive */
2414: PHP_FUNCTION(mb_strripos)
2415: {
2416: int n;
2417: long offset;
2418: mbfl_string haystack, needle;
1.1.1.2 ! misho 2419: const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
1.1 misho 2420: int from_encoding_len;
2421: n = -1;
2422: offset = 0;
2423:
2424: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2425: RETURN_FALSE;
2426: }
2427:
2428: n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2429:
2430: if (n >= 0) {
2431: RETVAL_LONG(n);
2432: } else {
2433: RETVAL_FALSE;
2434: }
2435: }
2436: /* }}} */
2437:
2438: /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2439: Finds first occurrence of a string within another */
2440: PHP_FUNCTION(mb_strstr)
2441: {
2442: int n, len, mblen;
2443: mbfl_string haystack, needle, result, *ret = NULL;
2444: char *enc_name = NULL;
2445: int enc_name_len;
2446: zend_bool part = 0;
2447:
2448: mbfl_string_init(&haystack);
2449: mbfl_string_init(&needle);
2450: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2451: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2452: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2453: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2454:
2455: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2456: RETURN_FALSE;
2457: }
2458:
2459: if (enc_name != NULL) {
2460: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2461: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2462: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2463: RETURN_FALSE;
2464: }
2465: }
2466:
2467: if (needle.len <= 0) {
2468: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2469: RETURN_FALSE;
2470: }
2471: n = mbfl_strpos(&haystack, &needle, 0, 0);
2472: if (n >= 0) {
2473: mblen = mbfl_strlen(&haystack);
2474: if (part) {
2475: ret = mbfl_substr(&haystack, &result, 0, n);
2476: if (ret != NULL) {
2477: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2478: } else {
2479: RETVAL_FALSE;
2480: }
2481: } else {
2482: len = (mblen - n);
2483: ret = mbfl_substr(&haystack, &result, n, len);
2484: if (ret != NULL) {
2485: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2486: } else {
2487: RETVAL_FALSE;
2488: }
2489: }
2490: } else {
2491: RETVAL_FALSE;
2492: }
2493: }
2494: /* }}} */
2495:
2496: /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2497: Finds the last occurrence of a character in a string within another */
2498: PHP_FUNCTION(mb_strrchr)
2499: {
2500: int n, len, mblen;
2501: mbfl_string haystack, needle, result, *ret = NULL;
2502: char *enc_name = NULL;
2503: int enc_name_len;
2504: zend_bool part = 0;
2505:
2506: mbfl_string_init(&haystack);
2507: mbfl_string_init(&needle);
2508: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2509: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2510: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2511: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2512:
2513: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2514: RETURN_FALSE;
2515: }
2516:
2517: if (enc_name != NULL) {
2518: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2519: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2520: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2521: RETURN_FALSE;
2522: }
2523: }
2524:
2525: if (haystack.len <= 0) {
2526: RETURN_FALSE;
2527: }
2528: if (needle.len <= 0) {
2529: RETURN_FALSE;
2530: }
2531: n = mbfl_strpos(&haystack, &needle, 0, 1);
2532: if (n >= 0) {
2533: mblen = mbfl_strlen(&haystack);
2534: if (part) {
2535: ret = mbfl_substr(&haystack, &result, 0, n);
2536: if (ret != NULL) {
2537: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2538: } else {
2539: RETVAL_FALSE;
2540: }
2541: } else {
2542: len = (mblen - n);
2543: ret = mbfl_substr(&haystack, &result, n, len);
2544: if (ret != NULL) {
2545: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2546: } else {
2547: RETVAL_FALSE;
2548: }
2549: }
2550: } else {
2551: RETVAL_FALSE;
2552: }
2553: }
2554: /* }}} */
2555:
2556: /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2557: Finds first occurrence of a string within another, case insensitive */
2558: PHP_FUNCTION(mb_stristr)
2559: {
2560: zend_bool part = 0;
2561: unsigned int from_encoding_len, len, mblen;
2562: int n;
2563: mbfl_string haystack, needle, result, *ret = NULL;
1.1.1.2 ! misho 2564: const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
1.1 misho 2565: mbfl_string_init(&haystack);
2566: mbfl_string_init(&needle);
2567: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2568: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2569: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2570: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2571:
2572:
2573: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2574: RETURN_FALSE;
2575: }
2576:
2577: if (!needle.len) {
2578: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2579: RETURN_FALSE;
2580: }
2581:
2582: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2583: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2584: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2585: RETURN_FALSE;
2586: }
2587:
2588: n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2589:
2590: if (n <0) {
2591: RETURN_FALSE;
2592: }
2593:
2594: mblen = mbfl_strlen(&haystack);
2595:
2596: if (part) {
2597: ret = mbfl_substr(&haystack, &result, 0, n);
2598: if (ret != NULL) {
2599: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2600: } else {
2601: RETVAL_FALSE;
2602: }
2603: } else {
2604: len = (mblen - n);
2605: ret = mbfl_substr(&haystack, &result, n, len);
2606: if (ret != NULL) {
2607: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2608: } else {
2609: RETVAL_FALSE;
2610: }
2611: }
2612: }
2613: /* }}} */
2614:
2615: /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2616: Finds the last occurrence of a character in a string within another, case insensitive */
2617: PHP_FUNCTION(mb_strrichr)
2618: {
2619: zend_bool part = 0;
2620: int n, from_encoding_len, len, mblen;
2621: mbfl_string haystack, needle, result, *ret = NULL;
1.1.1.2 ! misho 2622: const char *from_encoding = MBSTRG(current_internal_encoding)->name;
1.1 misho 2623: mbfl_string_init(&haystack);
2624: mbfl_string_init(&needle);
2625: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2626: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2627: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2628: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2629:
2630:
2631: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2632: RETURN_FALSE;
2633: }
2634:
2635: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2636: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2637: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2638: RETURN_FALSE;
2639: }
2640:
2641: n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2642:
2643: if (n <0) {
2644: RETURN_FALSE;
2645: }
2646:
2647: mblen = mbfl_strlen(&haystack);
2648:
2649: if (part) {
2650: ret = mbfl_substr(&haystack, &result, 0, n);
2651: if (ret != NULL) {
2652: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2653: } else {
2654: RETVAL_FALSE;
2655: }
2656: } else {
2657: len = (mblen - n);
2658: ret = mbfl_substr(&haystack, &result, n, len);
2659: if (ret != NULL) {
2660: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2661: } else {
2662: RETVAL_FALSE;
2663: }
2664: }
2665: }
2666: /* }}} */
2667:
2668: /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2669: Count the number of substring occurrences */
2670: PHP_FUNCTION(mb_substr_count)
2671: {
2672: int n;
2673: mbfl_string haystack, needle;
2674: char *enc_name = NULL;
2675: int enc_name_len;
2676:
2677: mbfl_string_init(&haystack);
2678: mbfl_string_init(&needle);
2679: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 2680: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2681: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 2682: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2683:
2684: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2685: return;
2686: }
2687:
2688: if (enc_name != NULL) {
2689: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2690: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2691: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2692: RETURN_FALSE;
2693: }
2694: }
2695:
2696: if (needle.len <= 0) {
2697: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2698: RETURN_FALSE;
2699: }
2700:
2701: n = mbfl_substr_count(&haystack, &needle);
2702: if (n >= 0) {
2703: RETVAL_LONG(n);
2704: } else {
2705: RETVAL_FALSE;
2706: }
2707: }
2708: /* }}} */
2709:
2710: /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2711: Returns part of a string */
2712: PHP_FUNCTION(mb_substr)
2713: {
2714: size_t argc = ZEND_NUM_ARGS();
2715: char *str, *encoding;
2716: long from, len;
2717: int mblen, str_len, encoding_len;
2718: mbfl_string string, result, *ret;
2719:
2720: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2721: return;
2722: }
2723:
2724: mbfl_string_init(&string);
2725: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 2726: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2727:
2728: if (argc == 4) {
2729: string.no_encoding = mbfl_name2no_encoding(encoding);
2730: if (string.no_encoding == mbfl_no_encoding_invalid) {
2731: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2732: RETURN_FALSE;
2733: }
2734: }
2735:
2736: string.val = (unsigned char *)str;
2737: string.len = str_len;
2738:
2739: if (argc < 3) {
2740: len = str_len;
2741: }
2742:
2743: /* measures length */
2744: mblen = 0;
2745: if (from < 0 || len < 0) {
2746: mblen = mbfl_strlen(&string);
2747: }
2748:
2749: /* if "from" position is negative, count start position from the end
2750: * of the string
2751: */
2752: if (from < 0) {
2753: from = mblen + from;
2754: if (from < 0) {
2755: from = 0;
2756: }
2757: }
2758:
2759: /* if "length" position is negative, set it to the length
2760: * needed to stop that many chars from the end of the string
2761: */
2762: if (len < 0) {
2763: len = (mblen - from) + len;
2764: if (len < 0) {
2765: len = 0;
2766: }
2767: }
2768:
2769: if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2770: && (from >= mbfl_strlen(&string))) {
2771: RETURN_FALSE;
2772: }
2773:
2774: ret = mbfl_substr(&string, &result, from, len);
2775: if (NULL == ret) {
2776: RETURN_FALSE;
2777: }
2778:
2779: RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2780: }
2781: /* }}} */
2782:
2783: /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2784: Returns part of a string */
2785: PHP_FUNCTION(mb_strcut)
2786: {
2787: size_t argc = ZEND_NUM_ARGS();
2788: char *encoding;
2789: long from, len;
2790: int encoding_len;
2791: mbfl_string string, result, *ret;
2792:
2793: mbfl_string_init(&string);
2794: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 2795: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2796:
2797: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2798: return;
2799: }
2800:
2801: if (argc == 4) {
2802: string.no_encoding = mbfl_name2no_encoding(encoding);
2803: if (string.no_encoding == mbfl_no_encoding_invalid) {
2804: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2805: RETURN_FALSE;
2806: }
2807: }
2808:
2809: if (argc < 3) {
2810: len = string.len;
2811: }
2812:
2813: /* if "from" position is negative, count start position from the end
2814: * of the string
2815: */
2816: if (from < 0) {
2817: from = string.len + from;
2818: if (from < 0) {
2819: from = 0;
2820: }
2821: }
2822:
2823: /* if "length" position is negative, set it to the length
2824: * needed to stop that many chars from the end of the string
2825: */
2826: if (len < 0) {
2827: len = (string.len - from) + len;
2828: if (len < 0) {
2829: len = 0;
2830: }
2831: }
2832:
2833: if ((unsigned int)from > string.len) {
2834: RETURN_FALSE;
2835: }
2836:
2837: ret = mbfl_strcut(&string, &result, from, len);
2838: if (ret == NULL) {
2839: RETURN_FALSE;
2840: }
2841:
2842: RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2843: }
2844: /* }}} */
2845:
2846: /* {{{ proto int mb_strwidth(string str [, string encoding])
2847: Gets terminal width of a string */
2848: PHP_FUNCTION(mb_strwidth)
2849: {
2850: int n;
2851: mbfl_string string;
2852: char *enc_name = NULL;
2853: int enc_name_len;
2854:
2855: mbfl_string_init(&string);
2856:
2857: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 2858: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2859:
2860: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2861: return;
2862: }
2863:
2864: if (enc_name != NULL) {
2865: string.no_encoding = mbfl_name2no_encoding(enc_name);
2866: if (string.no_encoding == mbfl_no_encoding_invalid) {
2867: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2868: RETURN_FALSE;
2869: }
2870: }
2871:
2872: n = mbfl_strwidth(&string);
2873: if (n >= 0) {
2874: RETVAL_LONG(n);
2875: } else {
2876: RETVAL_FALSE;
2877: }
2878: }
2879: /* }}} */
2880:
2881: /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2882: Trim the string in terminal width */
2883: PHP_FUNCTION(mb_strimwidth)
2884: {
2885: char *str, *trimmarker, *encoding;
2886: long from, width;
2887: int str_len, trimmarker_len, encoding_len;
2888: mbfl_string string, result, marker, *ret;
2889:
2890: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2891: return;
2892: }
2893:
2894: mbfl_string_init(&string);
2895: mbfl_string_init(&marker);
2896: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 2897: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2898: marker.no_language = MBSTRG(language);
1.1.1.2 ! misho 2899: marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 2900: marker.val = NULL;
2901: marker.len = 0;
2902:
2903: if (ZEND_NUM_ARGS() == 5) {
2904: string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2905: if (string.no_encoding == mbfl_no_encoding_invalid) {
2906: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2907: RETURN_FALSE;
2908: }
2909: }
2910:
2911: string.val = (unsigned char *)str;
2912: string.len = str_len;
2913:
2914: if (from < 0 || from > str_len) {
2915: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2916: RETURN_FALSE;
2917: }
2918:
2919: if (width < 0) {
2920: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2921: RETURN_FALSE;
2922: }
2923:
2924: if (ZEND_NUM_ARGS() >= 4) {
2925: marker.val = (unsigned char *)trimmarker;
2926: marker.len = trimmarker_len;
2927: }
2928:
2929: ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2930:
2931: if (ret == NULL) {
2932: RETURN_FALSE;
2933: }
2934:
2935: RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2936: }
2937: /* }}} */
2938:
2939: /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2940: MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2941: {
2942: mbfl_string string, result, *ret;
1.1.1.2 ! misho 2943: const mbfl_encoding *from_encoding, *to_encoding;
1.1 misho 2944: mbfl_buffer_converter *convd;
1.1.1.2 ! misho 2945: size_t size;
! 2946: const mbfl_encoding **list;
1.1 misho 2947: char *output=NULL;
2948:
2949: if (output_len) {
2950: *output_len = 0;
2951: }
2952: if (!input) {
2953: return NULL;
2954: }
2955: /* new encoding */
2956: if (_to_encoding && strlen(_to_encoding)) {
1.1.1.2 ! misho 2957: to_encoding = mbfl_name2encoding(_to_encoding);
! 2958: if (!to_encoding) {
1.1 misho 2959: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2960: return NULL;
2961: }
2962: } else {
2963: to_encoding = MBSTRG(current_internal_encoding);
2964: }
2965:
2966: /* initialize string */
2967: mbfl_string_init(&string);
2968: mbfl_string_init(&result);
2969: from_encoding = MBSTRG(current_internal_encoding);
1.1.1.2 ! misho 2970: string.no_encoding = from_encoding->no_encoding;
1.1 misho 2971: string.no_language = MBSTRG(language);
2972: string.val = (unsigned char *)input;
2973: string.len = length;
2974:
2975: /* pre-conversion encoding */
2976: if (_from_encodings) {
2977: list = NULL;
2978: size = 0;
1.1.1.2 ! misho 2979: php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
1.1 misho 2980: if (size == 1) {
2981: from_encoding = *list;
1.1.1.2 ! misho 2982: string.no_encoding = from_encoding->no_encoding;
1.1 misho 2983: } else if (size > 1) {
2984: /* auto detect */
1.1.1.2 ! misho 2985: from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
! 2986: if (from_encoding) {
! 2987: string.no_encoding = from_encoding->no_encoding;
1.1 misho 2988: } else {
2989: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
1.1.1.2 ! misho 2990: from_encoding = &mbfl_encoding_pass;
1.1 misho 2991: to_encoding = from_encoding;
1.1.1.2 ! misho 2992: string.no_encoding = from_encoding->no_encoding;
1.1 misho 2993: }
2994: } else {
2995: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
2996: }
2997: if (list != NULL) {
2998: efree((void *)list);
2999: }
3000: }
3001:
3002: /* initialize converter */
1.1.1.2 ! misho 3003: convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
1.1 misho 3004: if (convd == NULL) {
3005: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3006: return NULL;
3007: }
3008: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3009: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3010:
3011: /* do it */
3012: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3013: if (ret) {
3014: if (output_len) {
3015: *output_len = ret->len;
3016: }
3017: output = (char *)ret->val;
3018: }
3019:
3020: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3021: mbfl_buffer_converter_delete(convd);
3022: return output;
3023: }
3024: /* }}} */
3025:
3026: /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3027: Returns converted string in desired encoding */
3028: PHP_FUNCTION(mb_convert_encoding)
3029: {
3030: char *arg_str, *arg_new;
3031: int str_len, new_len;
3032: zval *arg_old;
3033: int i;
3034: size_t size, l, n;
3035: char *_from_encodings = NULL, *ret, *s_free = NULL;
3036:
3037: zval **hash_entry;
3038: HashTable *target_hash;
3039:
3040: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3041: return;
3042: }
3043:
3044: if (ZEND_NUM_ARGS() == 3) {
3045: switch (Z_TYPE_P(arg_old)) {
3046: case IS_ARRAY:
3047: target_hash = Z_ARRVAL_P(arg_old);
3048: zend_hash_internal_pointer_reset(target_hash);
3049: i = zend_hash_num_elements(target_hash);
3050: _from_encodings = NULL;
3051:
3052: while (i > 0) {
3053: if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3054: break;
3055: }
3056:
3057: convert_to_string_ex(hash_entry);
3058:
3059: if ( _from_encodings) {
3060: l = strlen(_from_encodings);
3061: n = strlen(Z_STRVAL_PP(hash_entry));
3062: _from_encodings = erealloc(_from_encodings, l+n+2);
3063: strcpy(_from_encodings+l, ",");
3064: strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3065: } else {
3066: _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3067: }
3068:
3069: zend_hash_move_forward(target_hash);
3070: i--;
3071: }
3072:
3073: if (_from_encodings != NULL && !strlen(_from_encodings)) {
3074: efree(_from_encodings);
3075: _from_encodings = NULL;
3076: }
3077: s_free = _from_encodings;
3078: break;
3079: default:
3080: convert_to_string(arg_old);
3081: _from_encodings = Z_STRVAL_P(arg_old);
3082: break;
3083: }
3084: }
3085:
3086: /* new encoding */
3087: ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3088: if (ret != NULL) {
3089: RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
3090: } else {
3091: RETVAL_FALSE;
3092: }
3093:
3094: if ( s_free) {
3095: efree(s_free);
3096: }
3097: }
3098: /* }}} */
3099:
3100: /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3101: Returns a case-folded version of sourcestring */
3102: PHP_FUNCTION(mb_convert_case)
3103: {
1.1.1.2 ! misho 3104: const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
! 3105: char *str;
1.1 misho 3106: int str_len, from_encoding_len;
3107: long case_mode = 0;
3108: char *newstr;
3109: size_t ret_len;
3110:
3111: RETVAL_FALSE;
3112: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3113: &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3114: RETURN_FALSE;
3115:
3116: newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3117:
3118: if (newstr) {
3119: RETVAL_STRINGL(newstr, ret_len, 0);
3120: }
3121: }
3122: /* }}} */
3123:
3124: /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3125: * Returns a uppercased version of sourcestring
3126: */
3127: PHP_FUNCTION(mb_strtoupper)
3128: {
1.1.1.2 ! misho 3129: const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
! 3130: char *str;
1.1 misho 3131: int str_len, from_encoding_len;
3132: char *newstr;
3133: size_t ret_len;
3134:
3135: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3136: &from_encoding, &from_encoding_len) == FAILURE) {
3137: return;
3138: }
3139: newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3140:
3141: if (newstr) {
3142: RETURN_STRINGL(newstr, ret_len, 0);
3143: }
3144: RETURN_FALSE;
3145: }
3146: /* }}} */
3147:
3148: /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3149: * Returns a lowercased version of sourcestring
3150: */
3151: PHP_FUNCTION(mb_strtolower)
3152: {
1.1.1.2 ! misho 3153: const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
! 3154: char *str;
1.1 misho 3155: int str_len, from_encoding_len;
3156: char *newstr;
3157: size_t ret_len;
3158:
3159: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3160: &from_encoding, &from_encoding_len) == FAILURE) {
3161: return;
3162: }
3163: newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3164:
3165: if (newstr) {
3166: RETURN_STRINGL(newstr, ret_len, 0);
3167: }
3168: RETURN_FALSE;
3169: }
3170: /* }}} */
3171:
3172: /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3173: Encodings of the given string is returned (as a string) */
3174: PHP_FUNCTION(mb_detect_encoding)
3175: {
3176: char *str;
3177: int str_len;
3178: zend_bool strict=0;
3179: zval *encoding_list;
3180:
3181: mbfl_string string;
1.1.1.2 ! misho 3182: const mbfl_encoding *ret;
! 3183: const mbfl_encoding **elist, **list;
! 3184: size_t size;
1.1 misho 3185:
3186: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3187: return;
3188: }
3189:
3190: /* make encoding list */
3191: list = NULL;
3192: size = 0;
3193: if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3194: switch (Z_TYPE_P(encoding_list)) {
3195: case IS_ARRAY:
1.1.1.2 ! misho 3196: if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
1.1 misho 3197: if (list) {
3198: efree(list);
3199: list = NULL;
3200: size = 0;
3201: }
3202: }
3203: break;
3204: default:
3205: convert_to_string(encoding_list);
1.1.1.2 ! misho 3206: if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
1.1 misho 3207: if (list) {
3208: efree(list);
3209: list = NULL;
3210: size = 0;
3211: }
3212: }
3213: break;
3214: }
3215: if (size <= 0) {
3216: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3217: }
3218: }
3219:
3220: if (ZEND_NUM_ARGS() < 3) {
3221: strict = (zend_bool)MBSTRG(strict_detection);
3222: }
3223:
3224: if (size > 0 && list != NULL) {
3225: elist = list;
3226: } else {
3227: elist = MBSTRG(current_detect_order_list);
3228: size = MBSTRG(current_detect_order_list_size);
3229: }
3230:
3231: mbfl_string_init(&string);
3232: string.no_language = MBSTRG(language);
3233: string.val = (unsigned char *)str;
3234: string.len = str_len;
1.1.1.2 ! misho 3235: ret = mbfl_identify_encoding2(&string, elist, size, strict);
1.1 misho 3236:
3237: if (list != NULL) {
3238: efree((void *)list);
3239: }
3240:
3241: if (ret == NULL) {
3242: RETURN_FALSE;
3243: }
3244:
1.1.1.2 ! misho 3245: RETVAL_STRING((char *)ret->name, 1);
1.1 misho 3246: }
3247: /* }}} */
3248:
3249: /* {{{ proto mixed mb_list_encodings()
3250: Returns an array of all supported entity encodings */
3251: PHP_FUNCTION(mb_list_encodings)
3252: {
3253: const mbfl_encoding **encodings;
3254: const mbfl_encoding *encoding;
3255: int i;
3256:
3257: array_init(return_value);
3258: i = 0;
3259: encodings = mbfl_get_supported_encodings();
3260: while ((encoding = encodings[i++]) != NULL) {
3261: add_next_index_string(return_value, (char *) encoding->name, 1);
3262: }
3263: }
3264: /* }}} */
3265:
3266: /* {{{ proto array mb_encoding_aliases(string encoding)
3267: Returns an array of the aliases of a given encoding name */
3268: PHP_FUNCTION(mb_encoding_aliases)
3269: {
3270: const mbfl_encoding *encoding;
3271: char *name = NULL;
3272: int name_len;
3273:
3274: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3275: RETURN_FALSE;
3276: }
3277:
3278: encoding = mbfl_name2encoding(name);
3279: if (!encoding) {
3280: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3281: RETURN_FALSE;
3282: }
3283:
3284: array_init(return_value);
3285: if (encoding->aliases != NULL) {
3286: const char **alias;
3287: for (alias = *encoding->aliases; *alias; ++alias) {
3288: add_next_index_string(return_value, (char *)*alias, 1);
3289: }
3290: }
3291: }
3292: /* }}} */
3293:
3294: /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3295: Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3296: PHP_FUNCTION(mb_encode_mimeheader)
3297: {
3298: enum mbfl_no_encoding charset, transenc;
3299: mbfl_string string, result, *ret;
3300: char *charset_name = NULL;
3301: int charset_name_len;
3302: char *trans_enc_name = NULL;
3303: int trans_enc_name_len;
3304: char *linefeed = "\r\n";
3305: int linefeed_len;
3306: long indent = 0;
3307:
3308: mbfl_string_init(&string);
3309: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 3310: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 3311:
3312: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3313: return;
3314: }
3315:
3316: charset = mbfl_no_encoding_pass;
3317: transenc = mbfl_no_encoding_base64;
3318:
3319: if (charset_name != NULL) {
3320: charset = mbfl_name2no_encoding(charset_name);
3321: if (charset == mbfl_no_encoding_invalid) {
3322: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3323: RETURN_FALSE;
3324: }
3325: } else {
3326: const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3327: if (lang != NULL) {
3328: charset = lang->mail_charset;
3329: transenc = lang->mail_header_encoding;
3330: }
3331: }
3332:
3333: if (trans_enc_name != NULL) {
3334: if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3335: transenc = mbfl_no_encoding_base64;
3336: } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3337: transenc = mbfl_no_encoding_qprint;
3338: }
3339: }
3340:
3341: mbfl_string_init(&result);
3342: ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3343: if (ret != NULL) {
1.1.1.2 ! misho 3344: RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
1.1 misho 3345: } else {
3346: RETVAL_FALSE;
3347: }
3348: }
3349: /* }}} */
3350:
3351: /* {{{ proto string mb_decode_mimeheader(string string)
3352: Decodes the MIME "encoded-word" in the string */
3353: PHP_FUNCTION(mb_decode_mimeheader)
3354: {
3355: mbfl_string string, result, *ret;
3356:
3357: mbfl_string_init(&string);
3358: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 3359: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 3360:
3361: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3362: return;
3363: }
3364:
3365: mbfl_string_init(&result);
1.1.1.2 ! misho 3366: ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
1.1 misho 3367: if (ret != NULL) {
1.1.1.2 ! misho 3368: RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
1.1 misho 3369: } else {
3370: RETVAL_FALSE;
3371: }
3372: }
3373: /* }}} */
3374:
3375: /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3376: Conversion between full-width character and half-width character (Japanese) */
3377: PHP_FUNCTION(mb_convert_kana)
3378: {
3379: int opt, i;
3380: mbfl_string string, result, *ret;
3381: char *optstr = NULL;
3382: int optstr_len;
3383: char *encname = NULL;
3384: int encname_len;
3385:
3386: mbfl_string_init(&string);
3387: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 3388: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 3389:
3390: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3391: return;
3392: }
3393:
3394: /* option */
3395: if (optstr != NULL) {
3396: char *p = optstr;
3397: int n = optstr_len;
3398: i = 0;
3399: opt = 0;
3400: while (i < n) {
3401: i++;
3402: switch (*p++) {
3403: case 'A':
3404: opt |= 0x1;
3405: break;
3406: case 'a':
3407: opt |= 0x10;
3408: break;
3409: case 'R':
3410: opt |= 0x2;
3411: break;
3412: case 'r':
3413: opt |= 0x20;
3414: break;
3415: case 'N':
3416: opt |= 0x4;
3417: break;
3418: case 'n':
3419: opt |= 0x40;
3420: break;
3421: case 'S':
3422: opt |= 0x8;
3423: break;
3424: case 's':
3425: opt |= 0x80;
3426: break;
3427: case 'K':
3428: opt |= 0x100;
3429: break;
3430: case 'k':
3431: opt |= 0x1000;
3432: break;
3433: case 'H':
3434: opt |= 0x200;
3435: break;
3436: case 'h':
3437: opt |= 0x2000;
3438: break;
3439: case 'V':
3440: opt |= 0x800;
3441: break;
3442: case 'C':
3443: opt |= 0x10000;
3444: break;
3445: case 'c':
3446: opt |= 0x20000;
3447: break;
3448: case 'M':
3449: opt |= 0x100000;
3450: break;
3451: case 'm':
3452: opt |= 0x200000;
3453: break;
3454: }
3455: }
3456: } else {
3457: opt = 0x900;
3458: }
3459:
3460: /* encoding */
3461: if (encname != NULL) {
3462: string.no_encoding = mbfl_name2no_encoding(encname);
3463: if (string.no_encoding == mbfl_no_encoding_invalid) {
3464: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3465: RETURN_FALSE;
3466: }
3467: }
3468:
3469: ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3470: if (ret != NULL) {
3471: RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3472: } else {
3473: RETVAL_FALSE;
3474: }
3475: }
3476: /* }}} */
3477:
3478: #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3479:
3480: /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3481: Converts the string resource in variables to desired encoding */
3482: PHP_FUNCTION(mb_convert_variables)
3483: {
3484: zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3485: HashTable *target_hash;
3486: mbfl_string string, result, *ret;
1.1.1.2 ! misho 3487: const mbfl_encoding *from_encoding, *to_encoding;
1.1 misho 3488: mbfl_encoding_detector *identd;
3489: mbfl_buffer_converter *convd;
1.1.1.2 ! misho 3490: int n, to_enc_len, argc, stack_level, stack_max;
! 3491: size_t elistsz;
! 3492: const mbfl_encoding **elist;
! 3493: char *to_enc;
1.1 misho 3494: void *ptmp;
1.1.1.2 ! misho 3495:
1.1 misho 3496: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3497: return;
3498: }
3499:
3500: /* new encoding */
1.1.1.2 ! misho 3501: to_encoding = mbfl_name2encoding(to_enc);
! 3502: if (!to_encoding) {
1.1 misho 3503: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3504: efree(args);
3505: RETURN_FALSE;
3506: }
3507:
3508: /* initialize string */
3509: mbfl_string_init(&string);
3510: mbfl_string_init(&result);
3511: from_encoding = MBSTRG(current_internal_encoding);
1.1.1.2 ! misho 3512: string.no_encoding = from_encoding->no_encoding;
1.1 misho 3513: string.no_language = MBSTRG(language);
3514:
3515: /* pre-conversion encoding */
3516: elist = NULL;
3517: elistsz = 0;
3518: switch (Z_TYPE_PP(zfrom_enc)) {
3519: case IS_ARRAY:
3520: php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3521: break;
3522: default:
3523: convert_to_string_ex(zfrom_enc);
3524: php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3525: break;
3526: }
3527: if (elistsz <= 0) {
1.1.1.2 ! misho 3528: from_encoding = &mbfl_encoding_pass;
1.1 misho 3529: } else if (elistsz == 1) {
3530: from_encoding = *elist;
3531: } else {
3532: /* auto detect */
1.1.1.2 ! misho 3533: from_encoding = NULL;
1.1 misho 3534: stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3535: stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3536: stack_level = 0;
1.1.1.2 ! misho 3537: identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
1.1 misho 3538: if (identd != NULL) {
3539: n = 0;
3540: while (n < argc || stack_level > 0) {
3541: if (stack_level <= 0) {
3542: var = args[n++];
3543: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3544: target_hash = HASH_OF(*var);
3545: if (target_hash != NULL) {
3546: zend_hash_internal_pointer_reset(target_hash);
3547: }
3548: }
3549: } else {
3550: stack_level--;
3551: var = stack[stack_level];
3552: }
3553: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3554: target_hash = HASH_OF(*var);
3555: if (target_hash != NULL) {
3556: while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3557: zend_hash_move_forward(target_hash);
3558: if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3559: if (stack_level >= stack_max) {
3560: stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3561: ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3562: stack = (zval ***)ptmp;
3563: }
3564: stack[stack_level] = var;
3565: stack_level++;
3566: var = hash_entry;
3567: target_hash = HASH_OF(*var);
3568: if (target_hash != NULL) {
3569: zend_hash_internal_pointer_reset(target_hash);
3570: continue;
3571: }
3572: } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3573: string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3574: string.len = Z_STRLEN_PP(hash_entry);
3575: if (mbfl_encoding_detector_feed(identd, &string)) {
3576: goto detect_end; /* complete detecting */
3577: }
3578: }
3579: }
3580: }
3581: } else if (Z_TYPE_PP(var) == IS_STRING) {
3582: string.val = (unsigned char *)Z_STRVAL_PP(var);
3583: string.len = Z_STRLEN_PP(var);
3584: if (mbfl_encoding_detector_feed(identd, &string)) {
3585: goto detect_end; /* complete detecting */
3586: }
3587: }
3588: }
3589: detect_end:
1.1.1.2 ! misho 3590: from_encoding = mbfl_encoding_detector_judge2(identd);
1.1 misho 3591: mbfl_encoding_detector_delete(identd);
3592: }
3593: efree(stack);
3594:
1.1.1.2 ! misho 3595: if (!from_encoding) {
1.1 misho 3596: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
1.1.1.2 ! misho 3597: from_encoding = &mbfl_encoding_pass;
1.1 misho 3598: }
3599: }
3600: if (elist != NULL) {
3601: efree((void *)elist);
3602: }
3603: /* create converter */
3604: convd = NULL;
1.1.1.2 ! misho 3605: if (from_encoding != &mbfl_encoding_pass) {
! 3606: convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
1.1 misho 3607: if (convd == NULL) {
3608: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3609: RETURN_FALSE;
3610: }
3611: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3612: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3613: }
3614:
3615: /* convert */
3616: if (convd != NULL) {
3617: stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3618: stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3619: stack_level = 0;
3620: n = 0;
3621: while (n < argc || stack_level > 0) {
3622: if (stack_level <= 0) {
3623: var = args[n++];
3624: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3625: target_hash = HASH_OF(*var);
3626: if (target_hash != NULL) {
3627: zend_hash_internal_pointer_reset(target_hash);
3628: }
3629: }
3630: } else {
3631: stack_level--;
3632: var = stack[stack_level];
3633: }
3634: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3635: target_hash = HASH_OF(*var);
3636: if (target_hash != NULL) {
3637: while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3638: zend_hash_move_forward(target_hash);
3639: if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3640: if (stack_level >= stack_max) {
3641: stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3642: ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3643: stack = (zval ***)ptmp;
3644: }
3645: stack[stack_level] = var;
3646: stack_level++;
3647: var = hash_entry;
3648: SEPARATE_ZVAL(hash_entry);
3649: target_hash = HASH_OF(*var);
3650: if (target_hash != NULL) {
3651: zend_hash_internal_pointer_reset(target_hash);
3652: continue;
3653: }
3654: } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3655: string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3656: string.len = Z_STRLEN_PP(hash_entry);
3657: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3658: if (ret != NULL) {
3659: if (Z_REFCOUNT_PP(hash_entry) > 1) {
3660: Z_DELREF_PP(hash_entry);
3661: MAKE_STD_ZVAL(*hash_entry);
3662: } else {
3663: zval_dtor(*hash_entry);
3664: }
3665: ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3666: }
3667: }
3668: }
3669: }
3670: } else if (Z_TYPE_PP(var) == IS_STRING) {
3671: string.val = (unsigned char *)Z_STRVAL_PP(var);
3672: string.len = Z_STRLEN_PP(var);
3673: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3674: if (ret != NULL) {
3675: zval_dtor(*var);
3676: ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3677: }
3678: }
3679: }
3680: efree(stack);
3681:
3682: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3683: mbfl_buffer_converter_delete(convd);
3684: }
3685:
3686: efree(args);
3687:
1.1.1.2 ! misho 3688: if (from_encoding) {
! 3689: RETURN_STRING(from_encoding->name, 1);
1.1 misho 3690: } else {
3691: RETURN_FALSE;
3692: }
3693: }
3694: /* }}} */
3695:
3696: /* {{{ HTML numeric entity */
3697: /* {{{ static void php_mb_numericentity_exec() */
3698: static void
3699: php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3700: {
3701: char *str, *encoding;
3702: int str_len, encoding_len;
3703: zval *zconvmap, **hash_entry;
3704: HashTable *target_hash;
3705: size_t argc = ZEND_NUM_ARGS();
3706: int i, *convmap, *mapelm, mapsize=0;
1.1.1.2 ! misho 3707: zend_bool is_hex = 0;
1.1 misho 3708: mbfl_string string, result, *ret;
3709: enum mbfl_no_encoding no_encoding;
3710:
1.1.1.2 ! misho 3711: if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
1.1 misho 3712: return;
3713: }
3714:
3715: mbfl_string_init(&string);
3716: string.no_language = MBSTRG(language);
1.1.1.2 ! misho 3717: string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 3718: string.val = (unsigned char *)str;
3719: string.len = str_len;
3720:
3721: /* encoding */
1.1.1.2 ! misho 3722: if ((argc == 3 || argc == 4) && encoding_len > 0) {
1.1 misho 3723: no_encoding = mbfl_name2no_encoding(encoding);
3724: if (no_encoding == mbfl_no_encoding_invalid) {
3725: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3726: RETURN_FALSE;
3727: } else {
3728: string.no_encoding = no_encoding;
3729: }
3730: }
3731:
1.1.1.2 ! misho 3732: if (argc == 4) {
! 3733: if (type == 0 && is_hex) {
! 3734: type = 2; /* output in hex format */
! 3735: }
! 3736: }
! 3737:
1.1 misho 3738: /* conversion map */
3739: convmap = NULL;
3740: if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3741: target_hash = Z_ARRVAL_P(zconvmap);
3742: zend_hash_internal_pointer_reset(target_hash);
3743: i = zend_hash_num_elements(target_hash);
3744: if (i > 0) {
3745: convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3746: mapelm = convmap;
3747: mapsize = 0;
3748: while (i > 0) {
3749: if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3750: break;
3751: }
3752: convert_to_long_ex(hash_entry);
3753: *mapelm++ = Z_LVAL_PP(hash_entry);
3754: mapsize++;
3755: i--;
3756: zend_hash_move_forward(target_hash);
3757: }
3758: }
3759: }
3760: if (convmap == NULL) {
3761: RETURN_FALSE;
3762: }
3763: mapsize /= 4;
3764:
3765: ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3766: if (ret != NULL) {
3767: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3768: } else {
3769: RETVAL_FALSE;
3770: }
3771: efree((void *)convmap);
3772: }
3773: /* }}} */
3774:
1.1.1.2 ! misho 3775: /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
1.1 misho 3776: Converts specified characters to HTML numeric entities */
3777: PHP_FUNCTION(mb_encode_numericentity)
3778: {
3779: php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3780: }
3781: /* }}} */
3782:
3783: /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3784: Converts HTML numeric entities to character code */
3785: PHP_FUNCTION(mb_decode_numericentity)
3786: {
3787: php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3788: }
3789: /* }}} */
3790: /* }}} */
3791:
3792: /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3793: * Sends an email message with MIME scheme
3794: */
3795:
3796: #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3797: if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3798: pos += 2; \
3799: while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3800: pos++; \
3801: } \
3802: continue; \
3803: }
3804:
3805: #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
3806: pp = str; \
3807: ee = pp + len; \
3808: while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3809: *pp = ' '; \
3810: } \
3811:
3812: #define APPEND_ONE_CHAR(ch) do { \
3813: if (token.a > 0) { \
3814: smart_str_appendc(&token, ch); \
3815: } else {\
3816: token.len++; \
3817: } \
3818: } while (0)
3819:
3820: #define SEPARATE_SMART_STR(str) do {\
3821: if ((str)->a == 0) { \
3822: char *tmp_ptr; \
3823: (str)->a = 1; \
3824: while ((str)->a < (str)->len) { \
3825: (str)->a <<= 1; \
3826: } \
3827: tmp_ptr = emalloc((str)->a + 1); \
3828: memcpy(tmp_ptr, (str)->c, (str)->len); \
3829: (str)->c = tmp_ptr; \
3830: } \
3831: } while (0)
3832:
3833: static void my_smart_str_dtor(smart_str *s)
3834: {
3835: if (s->a > 0) {
3836: smart_str_free(s);
3837: }
3838: }
3839:
3840: static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3841: {
3842: const char *ps;
3843: size_t icnt;
3844: int state = 0;
3845: int crlf_state = -1;
3846:
3847: smart_str token = { 0, 0, 0 };
3848: smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3849:
3850: ps = str;
3851: icnt = str_len;
3852:
3853: /*
3854: * C o n t e n t - T y p e : t e x t / h t m l \r\n
3855: * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3856: * state 0 1 2 3
3857: *
3858: * C o n t e n t - T y p e : t e x t / h t m l \r\n
3859: * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3860: * crlf_state -1 0 1 -1
3861: *
3862: */
3863:
3864: while (icnt > 0) {
3865: switch (*ps) {
3866: case ':':
3867: if (crlf_state == 1) {
3868: APPEND_ONE_CHAR('\r');
3869: }
3870:
3871: if (state == 0 || state == 1) {
3872: fld_name = token;
3873:
3874: state = 2;
3875: } else {
3876: APPEND_ONE_CHAR(*ps);
3877: }
3878:
3879: crlf_state = 0;
3880: break;
3881:
3882: case '\n':
3883: if (crlf_state == -1) {
3884: goto out;
3885: }
3886: crlf_state = -1;
3887: break;
3888:
3889: case '\r':
3890: if (crlf_state == 1) {
3891: APPEND_ONE_CHAR('\r');
3892: } else {
3893: crlf_state = 1;
3894: }
3895: break;
3896:
3897: case ' ': case '\t':
3898: if (crlf_state == -1) {
3899: if (state == 3) {
3900: /* continuing from the previous line */
3901: SEPARATE_SMART_STR(&token);
3902: state = 4;
3903: } else {
3904: /* simply skipping this new line */
3905: state = 5;
3906: }
3907: } else {
3908: if (crlf_state == 1) {
3909: APPEND_ONE_CHAR('\r');
3910: }
3911: if (state == 1 || state == 3) {
3912: APPEND_ONE_CHAR(*ps);
3913: }
3914: }
3915: crlf_state = 0;
3916: break;
3917:
3918: default:
3919: switch (state) {
3920: case 0:
3921: token.c = (char *)ps;
3922: token.len = 0;
3923: token.a = 0;
3924: state = 1;
3925: break;
3926:
3927: case 2:
3928: if (crlf_state != -1) {
3929: token.c = (char *)ps;
3930: token.len = 0;
3931: token.a = 0;
3932:
3933: state = 3;
3934: break;
3935: }
3936: /* break is missing intentionally */
3937:
3938: case 3:
3939: if (crlf_state == -1) {
3940: fld_val = token;
3941:
3942: if (fld_name.c != NULL && fld_val.c != NULL) {
3943: char *dummy;
3944:
3945: /* FIXME: some locale free implementation is
3946: * really required here,,, */
3947: SEPARATE_SMART_STR(&fld_name);
3948: php_strtoupper(fld_name.c, fld_name.len);
3949:
3950: zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3951:
3952: my_smart_str_dtor(&fld_name);
3953: }
3954:
3955: memset(&fld_name, 0, sizeof(smart_str));
3956: memset(&fld_val, 0, sizeof(smart_str));
3957:
3958: token.c = (char *)ps;
3959: token.len = 0;
3960: token.a = 0;
3961:
3962: state = 1;
3963: }
3964: break;
3965:
3966: case 4:
3967: APPEND_ONE_CHAR(' ');
3968: state = 3;
3969: break;
3970: }
3971:
3972: if (crlf_state == 1) {
3973: APPEND_ONE_CHAR('\r');
3974: }
3975:
3976: APPEND_ONE_CHAR(*ps);
3977:
3978: crlf_state = 0;
3979: break;
3980: }
3981: ps++, icnt--;
3982: }
3983: out:
3984: if (state == 2) {
3985: token.c = "";
3986: token.len = 0;
3987: token.a = 0;
3988:
3989: state = 3;
3990: }
3991: if (state == 3) {
3992: fld_val = token;
3993:
3994: if (fld_name.c != NULL && fld_val.c != NULL) {
3995: void *dummy;
3996:
3997: /* FIXME: some locale free implementation is
3998: * really required here,,, */
3999: SEPARATE_SMART_STR(&fld_name);
4000: php_strtoupper(fld_name.c, fld_name.len);
4001:
4002: zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4003:
4004: my_smart_str_dtor(&fld_name);
4005: }
4006: }
4007: return state;
4008: }
4009:
4010: PHP_FUNCTION(mb_send_mail)
4011: {
4012: int n;
4013: char *to = NULL;
4014: int to_len;
4015: char *message = NULL;
4016: int message_len;
4017: char *headers = NULL;
4018: int headers_len;
4019: char *subject = NULL;
4020: int subject_len;
4021: char *extra_cmd = NULL;
4022: int extra_cmd_len;
4023: int i;
4024: char *to_r = NULL;
4025: char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4026: struct {
4027: int cnt_type:1;
4028: int cnt_trans_enc:1;
4029: } suppressed_hdrs = { 0, 0 };
4030:
4031: char *message_buf = NULL, *subject_buf = NULL, *p;
4032: mbfl_string orig_str, conv_str;
4033: mbfl_string *pstr; /* pointer to mbfl string for return value */
4034: enum mbfl_no_encoding
1.1.1.2 ! misho 4035: tran_cs, /* transfar text charset */
! 4036: head_enc, /* header transfar encoding */
! 4037: body_enc; /* body transfar encoding */
1.1 misho 4038: mbfl_memory_device device; /* automatic allocateable buffer for additional header */
4039: const mbfl_language *lang;
4040: int err = 0;
4041: HashTable ht_headers;
4042: smart_str *s;
4043: extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4044: char *pp, *ee;
1.1.1.2 ! misho 4045:
1.1 misho 4046: /* initialize */
4047: mbfl_memory_device_init(&device, 0, 0);
4048: mbfl_string_init(&orig_str);
4049: mbfl_string_init(&conv_str);
4050:
4051: /* character-set, transfer-encoding */
4052: tran_cs = mbfl_no_encoding_utf8;
4053: head_enc = mbfl_no_encoding_base64;
4054: body_enc = mbfl_no_encoding_base64;
4055: lang = mbfl_no2language(MBSTRG(language));
4056: if (lang != NULL) {
4057: tran_cs = lang->mail_charset;
4058: head_enc = lang->mail_header_encoding;
4059: body_enc = lang->mail_body_encoding;
4060: }
4061:
4062: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4063: return;
4064: }
4065:
4066: /* ASCIIZ check */
4067: MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4068: MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4069: MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4070: if (headers) {
4071: MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4072: }
4073: if (extra_cmd) {
4074: MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4075: }
4076:
4077: zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4078:
4079: if (headers != NULL) {
4080: _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4081: }
4082:
4083: if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4084: char *tmp;
4085: char *param_name;
4086: char *charset = NULL;
4087:
4088: SEPARATE_SMART_STR(s);
4089: smart_str_0(s);
4090:
4091: p = strchr(s->c, ';');
4092:
4093: if (p != NULL) {
4094: /* skipping the padded spaces */
4095: do {
4096: ++p;
4097: } while (*p == ' ' || *p == '\t');
4098:
4099: if (*p != '\0') {
4100: if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4101: if (strcasecmp(param_name, "charset") == 0) {
4102: enum mbfl_no_encoding _tran_cs = tran_cs;
4103:
4104: charset = php_strtok_r(NULL, "= \"", &tmp);
4105: if (charset != NULL) {
4106: _tran_cs = mbfl_name2no_encoding(charset);
4107: }
4108:
4109: if (_tran_cs == mbfl_no_encoding_invalid) {
4110: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4111: _tran_cs = mbfl_no_encoding_ascii;
4112: }
4113: tran_cs = _tran_cs;
4114: }
4115: }
4116: }
4117: }
4118: suppressed_hdrs.cnt_type = 1;
4119: }
4120:
4121: if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4122: enum mbfl_no_encoding _body_enc;
4123: SEPARATE_SMART_STR(s);
4124: smart_str_0(s);
4125:
4126: _body_enc = mbfl_name2no_encoding(s->c);
4127: switch (_body_enc) {
4128: case mbfl_no_encoding_base64:
4129: case mbfl_no_encoding_7bit:
4130: case mbfl_no_encoding_8bit:
4131: body_enc = _body_enc;
4132: break;
4133:
4134: default:
4135: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4136: body_enc = mbfl_no_encoding_8bit;
4137: break;
4138: }
4139: suppressed_hdrs.cnt_trans_enc = 1;
4140: }
4141:
4142: /* To: */
4143: if (to != NULL) {
1.1.1.2 ! misho 4144: if (to_len > 0) {
! 4145: to_r = estrndup(to, to_len);
! 4146: for (; to_len; to_len--) {
! 4147: if (!isspace((unsigned char) to_r[to_len - 1])) {
! 4148: break;
! 4149: }
! 4150: to_r[to_len - 1] = '\0';
! 4151: }
! 4152: for (i = 0; to_r[i]; i++) {
1.1 misho 4153: if (iscntrl((unsigned char) to_r[i])) {
4154: /* According to RFC 822, section 3.1.1 long headers may be separated into
4155: * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4156: * To prevent these separators from being replaced with a space, we use the
4157: * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4158: */
4159: SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4160: to_r[i] = ' ';
4161: }
1.1.1.2 ! misho 4162: }
! 4163: } else {
! 4164: to_r = to;
! 4165: }
! 4166: } else {
1.1 misho 4167: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4168: err = 1;
4169: }
4170:
4171: /* Subject: */
4172: if (subject != NULL && subject_len >= 0) {
4173: orig_str.no_language = MBSTRG(language);
4174: orig_str.val = (unsigned char *)subject;
4175: orig_str.len = subject_len;
1.1.1.2 ! misho 4176: orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
! 4177: if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
! 4178: const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
! 4179: orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
1.1 misho 4180: }
4181: pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4182: if (pstr != NULL) {
4183: subject_buf = subject = (char *)pstr->val;
4184: }
4185: } else {
4186: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4187: err = 1;
4188: }
4189:
4190: /* message body */
4191: if (message != NULL) {
4192: orig_str.no_language = MBSTRG(language);
4193: orig_str.val = (unsigned char *)message;
4194: orig_str.len = (unsigned int)message_len;
1.1.1.2 ! misho 4195: orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 4196:
1.1.1.2 ! misho 4197: if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
! 4198: const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
! 4199: orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
1.1 misho 4200: }
4201:
4202: pstr = NULL;
4203: {
4204: mbfl_string tmpstr;
4205:
4206: if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4207: tmpstr.no_encoding=mbfl_no_encoding_8bit;
4208: pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4209: efree(tmpstr.val);
4210: }
4211: }
4212: if (pstr != NULL) {
4213: message_buf = message = (char *)pstr->val;
4214: }
4215: } else {
4216: /* this is not really an error, so it is allowed. */
4217: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4218: message = NULL;
4219: }
4220:
4221: /* other headers */
4222: #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4223: #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4224: #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4225: #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4226: if (headers != NULL) {
4227: p = headers;
4228: n = headers_len;
4229: mbfl_memory_device_strncat(&device, p, n);
4230: if (n > 0 && p[n - 1] != '\n') {
4231: mbfl_memory_device_strncat(&device, "\n", 1);
4232: }
4233: }
4234:
4235: if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4236: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4237: mbfl_memory_device_strncat(&device, "\n", 1);
4238: }
4239:
4240: if (!suppressed_hdrs.cnt_type) {
4241: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4242:
4243: p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4244: if (p != NULL) {
4245: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4246: mbfl_memory_device_strcat(&device, p);
4247: }
4248: mbfl_memory_device_strncat(&device, "\n", 1);
4249: }
4250: if (!suppressed_hdrs.cnt_trans_enc) {
4251: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4252: p = (char *)mbfl_no2preferred_mime_name(body_enc);
4253: if (p == NULL) {
4254: p = "7bit";
4255: }
4256: mbfl_memory_device_strcat(&device, p);
4257: mbfl_memory_device_strncat(&device, "\n", 1);
4258: }
4259:
4260: mbfl_memory_device_unput(&device);
4261: mbfl_memory_device_output('\0', &device);
4262: headers = (char *)device.buffer;
4263:
4264: if (force_extra_parameters) {
4265: extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4266: } else if (extra_cmd) {
4267: extra_cmd = php_escape_shell_cmd(extra_cmd);
4268: }
4269:
4270: if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4271: RETVAL_TRUE;
4272: } else {
4273: RETVAL_FALSE;
4274: }
4275:
4276: if (extra_cmd) {
4277: efree(extra_cmd);
4278: }
4279: if (to_r != to) {
4280: efree(to_r);
4281: }
4282: if (subject_buf) {
4283: efree((void *)subject_buf);
4284: }
4285: if (message_buf) {
4286: efree((void *)message_buf);
4287: }
4288: mbfl_memory_device_clear(&device);
4289: zend_hash_destroy(&ht_headers);
4290: }
4291:
4292: #undef SKIP_LONG_HEADER_SEP_MBSTRING
4293: #undef MAIL_ASCIIZ_CHECK_MBSTRING
4294: #undef APPEND_ONE_CHAR
4295: #undef SEPARATE_SMART_STR
4296: #undef PHP_MBSTR_MAIL_MIME_HEADER1
4297: #undef PHP_MBSTR_MAIL_MIME_HEADER2
4298: #undef PHP_MBSTR_MAIL_MIME_HEADER3
4299: #undef PHP_MBSTR_MAIL_MIME_HEADER4
4300: /* }}} */
4301:
4302: /* {{{ proto mixed mb_get_info([string type])
4303: Returns the current settings of mbstring */
4304: PHP_FUNCTION(mb_get_info)
4305: {
4306: char *typ = NULL;
1.1.1.2 ! misho 4307: int typ_len;
! 4308: size_t n;
1.1 misho 4309: char *name;
4310: const struct mb_overload_def *over_func;
4311: zval *row1, *row2;
4312: const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
1.1.1.2 ! misho 4313: const mbfl_encoding **entry;
1.1 misho 4314:
4315: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4316: RETURN_FALSE;
4317: }
4318:
4319: if (!typ || !strcasecmp("all", typ)) {
4320: array_init(return_value);
1.1.1.2 ! misho 4321: if (MBSTRG(current_internal_encoding)) {
! 4322: add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
1.1 misho 4323: }
1.1.1.2 ! misho 4324: if (MBSTRG(http_input_identify)) {
! 4325: add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
1.1 misho 4326: }
1.1.1.2 ! misho 4327: if (MBSTRG(current_http_output_encoding)) {
! 4328: add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
1.1 misho 4329: }
4330: if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4331: add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4332: }
4333: add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4334: if (MBSTRG(func_overload)){
4335: over_func = &(mb_ovld[0]);
4336: MAKE_STD_ZVAL(row1);
4337: array_init(row1);
4338: while (over_func->type > 0) {
4339: if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4340: add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4341: }
4342: over_func++;
4343: }
4344: add_assoc_zval(return_value, "func_overload_list", row1);
4345: } else {
4346: add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4347: }
4348: if (lang != NULL) {
4349: if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4350: add_assoc_string(return_value, "mail_charset", name, 1);
4351: }
4352: if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4353: add_assoc_string(return_value, "mail_header_encoding", name, 1);
4354: }
4355: if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4356: add_assoc_string(return_value, "mail_body_encoding", name, 1);
4357: }
4358: }
4359: add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4360: if (MBSTRG(encoding_translation)) {
4361: add_assoc_string(return_value, "encoding_translation", "On", 1);
4362: } else {
4363: add_assoc_string(return_value, "encoding_translation", "Off", 1);
4364: }
4365: if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4366: add_assoc_string(return_value, "language", name, 1);
4367: }
4368: n = MBSTRG(current_detect_order_list_size);
4369: entry = MBSTRG(current_detect_order_list);
1.1.1.2 ! misho 4370: if (n > 0) {
! 4371: size_t i;
1.1 misho 4372: MAKE_STD_ZVAL(row2);
4373: array_init(row2);
1.1.1.2 ! misho 4374: for (i = 0; i < n; i++) {
! 4375: add_next_index_string(row2, (*entry)->name, 1);
1.1 misho 4376: entry++;
4377: }
4378: add_assoc_zval(return_value, "detect_order", row2);
4379: }
4380: if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4381: add_assoc_string(return_value, "substitute_character", "none", 1);
4382: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4383: add_assoc_string(return_value, "substitute_character", "long", 1);
4384: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4385: add_assoc_string(return_value, "substitute_character", "entity", 1);
4386: } else {
4387: add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4388: }
4389: if (MBSTRG(strict_detection)) {
4390: add_assoc_string(return_value, "strict_detection", "On", 1);
4391: } else {
4392: add_assoc_string(return_value, "strict_detection", "Off", 1);
4393: }
4394: } else if (!strcasecmp("internal_encoding", typ)) {
1.1.1.2 ! misho 4395: if (MBSTRG(current_internal_encoding)) {
! 4396: RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
1.1 misho 4397: }
4398: } else if (!strcasecmp("http_input", typ)) {
1.1.1.2 ! misho 4399: if (MBSTRG(http_input_identify)) {
! 4400: RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
1.1 misho 4401: }
4402: } else if (!strcasecmp("http_output", typ)) {
1.1.1.2 ! misho 4403: if (MBSTRG(current_http_output_encoding)) {
! 4404: RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
1.1 misho 4405: }
4406: } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4407: if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4408: RETVAL_STRING(name, 1);
4409: }
4410: } else if (!strcasecmp("func_overload", typ)) {
4411: RETVAL_LONG(MBSTRG(func_overload));
4412: } else if (!strcasecmp("func_overload_list", typ)) {
4413: if (MBSTRG(func_overload)){
4414: over_func = &(mb_ovld[0]);
4415: array_init(return_value);
4416: while (over_func->type > 0) {
4417: if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4418: add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4419: }
4420: over_func++;
4421: }
4422: } else {
4423: RETVAL_STRING("no overload", 1);
4424: }
4425: } else if (!strcasecmp("mail_charset", typ)) {
4426: if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4427: RETVAL_STRING(name, 1);
4428: }
4429: } else if (!strcasecmp("mail_header_encoding", typ)) {
4430: if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4431: RETVAL_STRING(name, 1);
4432: }
4433: } else if (!strcasecmp("mail_body_encoding", typ)) {
4434: if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4435: RETVAL_STRING(name, 1);
4436: }
4437: } else if (!strcasecmp("illegal_chars", typ)) {
4438: RETVAL_LONG(MBSTRG(illegalchars));
4439: } else if (!strcasecmp("encoding_translation", typ)) {
4440: if (MBSTRG(encoding_translation)) {
4441: RETVAL_STRING("On", 1);
4442: } else {
4443: RETVAL_STRING("Off", 1);
4444: }
4445: } else if (!strcasecmp("language", typ)) {
4446: if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4447: RETVAL_STRING(name, 1);
4448: }
4449: } else if (!strcasecmp("detect_order", typ)) {
4450: n = MBSTRG(current_detect_order_list_size);
4451: entry = MBSTRG(current_detect_order_list);
1.1.1.2 ! misho 4452: if (n > 0) {
! 4453: size_t i;
1.1 misho 4454: array_init(return_value);
1.1.1.2 ! misho 4455: for (i = 0; i < n; i++) {
! 4456: add_next_index_string(return_value, (*entry)->name, 1);
1.1 misho 4457: entry++;
4458: }
4459: }
4460: } else if (!strcasecmp("substitute_character", typ)) {
4461: if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4462: RETVAL_STRING("none", 1);
4463: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4464: RETVAL_STRING("long", 1);
4465: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4466: RETVAL_STRING("entity", 1);
4467: } else {
4468: RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4469: }
4470: } else if (!strcasecmp("strict_detection", typ)) {
4471: if (MBSTRG(strict_detection)) {
4472: RETVAL_STRING("On", 1);
4473: } else {
4474: RETVAL_STRING("Off", 1);
4475: }
4476: } else {
4477: RETURN_FALSE;
4478: }
4479: }
4480: /* }}} */
4481:
4482: /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4483: Check if the string is valid for the specified encoding */
4484: PHP_FUNCTION(mb_check_encoding)
4485: {
4486: char *var = NULL;
4487: int var_len;
4488: char *enc = NULL;
4489: int enc_len;
4490: mbfl_buffer_converter *convd;
1.1.1.2 ! misho 4491: const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
1.1 misho 4492: mbfl_string string, result, *ret = NULL;
4493: long illegalchars = 0;
4494:
4495: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4496: RETURN_FALSE;
4497: }
4498:
4499: if (var == NULL) {
4500: RETURN_BOOL(MBSTRG(illegalchars) == 0);
4501: }
4502:
4503: if (enc != NULL) {
1.1.1.2 ! misho 4504: encoding = mbfl_name2encoding(enc);
! 4505: if (!encoding || encoding == &mbfl_encoding_pass) {
1.1 misho 4506: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4507: RETURN_FALSE;
4508: }
4509: }
1.1.1.2 ! misho 4510:
! 4511: convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
1.1 misho 4512: if (convd == NULL) {
4513: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4514: RETURN_FALSE;
4515: }
4516: mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4517: mbfl_buffer_converter_illegal_substchar(convd, 0);
1.1.1.2 ! misho 4518:
1.1 misho 4519: /* initialize string */
1.1.1.2 ! misho 4520: mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
1.1 misho 4521: mbfl_string_init(&result);
4522:
4523: string.val = (unsigned char *)var;
4524: string.len = var_len;
4525: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4526: illegalchars = mbfl_buffer_illegalchars(convd);
4527: mbfl_buffer_converter_delete(convd);
4528:
4529: RETVAL_FALSE;
4530: if (ret != NULL) {
4531: if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4532: RETVAL_TRUE;
4533: }
4534: mbfl_string_clear(&result);
4535: }
4536: }
4537: /* }}} */
4538:
1.1.1.2 ! misho 4539:
! 4540: /* {{{ php_mb_populate_current_detect_order_list */
! 4541: static void php_mb_populate_current_detect_order_list(TSRMLS_D)
! 4542: {
! 4543: const mbfl_encoding **entry = 0;
! 4544: size_t nentries;
! 4545:
! 4546: if (MBSTRG(current_detect_order_list)) {
! 4547: return;
! 4548: }
! 4549:
! 4550: if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
! 4551: nentries = MBSTRG(detect_order_list_size);
! 4552: entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
! 4553: memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
! 4554: } else {
! 4555: const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
! 4556: size_t i;
! 4557: nentries = MBSTRG(default_detect_order_list_size);
! 4558: entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
! 4559: for (i = 0; i < nentries; i++) {
! 4560: entry[i] = mbfl_no2encoding(src[i]);
! 4561: }
! 4562: }
! 4563: MBSTRG(current_detect_order_list) = entry;
! 4564: MBSTRG(current_detect_order_list_size) = nentries;
! 4565: }
! 4566:
! 4567: /* {{{ static int php_mb_encoding_translation() */
! 4568: static int php_mb_encoding_translation(TSRMLS_D)
1.1 misho 4569: {
4570: return MBSTRG(encoding_translation);
4571: }
4572: /* }}} */
4573:
4574: /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4575: MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4576: {
4577: if (enc != NULL) {
4578: if (enc->flag & MBFL_ENCTYPE_MBCS) {
4579: if (enc->mblen_table != NULL) {
4580: if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4581: }
4582: } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4583: return 2;
4584: } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4585: return 4;
4586: }
4587: }
4588: return 1;
4589: }
4590: /* }}} */
4591:
4592: /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4593: MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4594: {
1.1.1.2 ! misho 4595: return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
1.1 misho 4596: }
4597: /* }}} */
4598:
4599: /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4600: MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4601: {
4602: register const char *p = s;
4603: char *last=NULL;
4604:
4605: if (nbytes == (size_t)-1) {
4606: size_t nb = 0;
4607:
4608: while (*p != '\0') {
4609: if (nb == 0) {
4610: if ((unsigned char)*p == (unsigned char)c) {
4611: last = (char *)p;
4612: }
4613: nb = php_mb_mbchar_bytes_ex(p, enc);
4614: if (nb == 0) {
4615: return NULL; /* something is going wrong! */
4616: }
4617: }
4618: --nb;
4619: ++p;
4620: }
4621: } else {
4622: register size_t bcnt = nbytes;
4623: register size_t nbytes_char;
4624: while (bcnt > 0) {
4625: if ((unsigned char)*p == (unsigned char)c) {
4626: last = (char *)p;
4627: }
4628: nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4629: if (bcnt < nbytes_char) {
4630: return NULL;
4631: }
4632: p += nbytes_char;
4633: bcnt -= nbytes_char;
4634: }
4635: }
4636: return last;
4637: }
4638: /* }}} */
4639:
4640: /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4641: MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4642: {
1.1.1.2 ! misho 4643: return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
1.1 misho 4644: }
4645: /* }}} */
4646:
4647: /* {{{ MBSTRING_API int php_mb_stripos()
4648: */
4649: MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4650: {
4651: int n;
4652: mbfl_string haystack, needle;
4653: n = -1;
4654:
4655: mbfl_string_init(&haystack);
4656: mbfl_string_init(&needle);
4657: haystack.no_language = MBSTRG(language);
1.1.1.2 ! misho 4658: haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 4659: needle.no_language = MBSTRG(language);
1.1.1.2 ! misho 4660: needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
1.1 misho 4661:
4662: do {
4663: size_t len = 0;
4664: haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4665: haystack.len = len;
4666:
4667: if (!haystack.val) {
4668: break;
4669: }
4670:
4671: if (haystack.len <= 0) {
4672: break;
4673: }
4674:
4675: needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4676: needle.len = len;
4677:
4678: if (!needle.val) {
4679: break;
4680: }
4681:
4682: if (needle.len <= 0) {
4683: break;
4684: }
4685:
4686: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4687: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4688: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4689: break;
4690: }
4691:
4692: {
4693: int haystack_char_len = mbfl_strlen(&haystack);
4694:
4695: if (mode) {
4696: if ((offset > 0 && offset > haystack_char_len) ||
4697: (offset < 0 && -offset > haystack_char_len)) {
4698: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4699: break;
4700: }
4701: } else {
4702: if (offset < 0 || offset > haystack_char_len) {
4703: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4704: break;
4705: }
4706: }
4707: }
4708:
4709: n = mbfl_strpos(&haystack, &needle, offset, mode);
4710: } while(0);
4711:
4712: if (haystack.val) {
4713: efree(haystack.val);
4714: }
4715:
4716: if (needle.val) {
4717: efree(needle.val);
4718: }
4719:
4720: return n;
4721: }
4722: /* }}} */
4723:
1.1.1.2 ! misho 4724: static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
1.1 misho 4725: {
1.1.1.2 ! misho 4726: *list = (const zend_encoding **)MBSTRG(http_input_list);
! 4727: *list_size = MBSTRG(http_input_list_size);
1.1 misho 4728: }
4729: /* }}} */
4730:
1.1.1.2 ! misho 4731: static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
1.1 misho 4732: {
1.1.1.2 ! misho 4733: MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
1.1 misho 4734: }
4735: /* }}} */
4736:
4737: #endif /* HAVE_MBSTRING */
4738:
4739: /*
4740: * Local variables:
4741: * tab-width: 4
4742: * c-basic-offset: 4
4743: * End:
4744: * vim600: fdm=marker
4745: * vim: noet sw=4 ts=4
4746: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>