Annotation of embedaddon/php/ext/mbstring/mbstring.c, revision 1.1.1.1
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16: | Rui Hirokawa <hirokawa@php.net> |
17: +----------------------------------------------------------------------+
18: */
19:
20: /* $Id: mbstring.c 321634 2012-01-01 13:15:04Z felipe $ */
21:
22: /*
23: * PHP 4 Multibyte String module "mbstring"
24: *
25: * History:
26: * 2000.5.19 Release php-4.0RC2_jstring-1.0
27: * 2001.4.1 Release php4_jstring-1.0.91
28: * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29: * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30: */
31:
32: /*
33: * PHP3 Internationalization support program.
34: *
35: * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36: * All rights reserved.
37: *
38: * See README_PHP3-i18n-ja for more detail.
39: *
40: * Authors:
41: * Hironori Sato <satoh@jpnnet.com>
42: * Shigeru Kanemoto <sgk@happysize.co.jp>
43: * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44: * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45: */
46:
47: /* {{{ includes */
48: #ifdef HAVE_CONFIG_H
49: #include "config.h"
50: #endif
51:
52: #include "php.h"
53: #include "php_ini.h"
54: #include "php_variables.h"
55: #include "mbstring.h"
56: #include "ext/standard/php_string.h"
57: #include "ext/standard/php_mail.h"
58: #include "ext/standard/exec.h"
59: #include "ext/standard/php_smart_str.h"
60: #include "ext/standard/url.h"
61: #include "main/php_output.h"
62: #include "ext/standard/info.h"
63:
64: #include "libmbfl/mbfl/mbfl_allocators.h"
65:
66: #include "php_variables.h"
67: #include "php_globals.h"
68: #include "rfc1867.h"
69: #include "php_content_types.h"
70: #include "SAPI.h"
71: #include "php_unicode.h"
72: #include "TSRM.h"
73:
74: #include "mb_gpc.h"
75:
76: #if HAVE_MBREGEX
77: #include "php_mbregex.h"
78: #endif
79:
80: #ifdef ZEND_MULTIBYTE
81: #include "zend_multibyte.h"
82: #endif /* ZEND_MULTIBYTE */
83:
84: #if HAVE_ONIG
85: #include "php_onig_compat.h"
86: #include <oniguruma.h>
87: #undef UChar
88: #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
89: #include "ext/pcre/php_pcre.h"
90: #endif
91: /* }}} */
92:
93: #if HAVE_MBSTRING
94:
95: /* {{{ prototypes */
96: ZEND_DECLARE_MODULE_GLOBALS(mbstring)
97:
98: static PHP_GINIT_FUNCTION(mbstring);
99: static PHP_GSHUTDOWN_FUNCTION(mbstring);
100:
101: #ifdef ZEND_MULTIBYTE
102: static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
103: static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
104: static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
105: static int php_mb_set_zend_encoding(TSRMLS_D);
106: #endif
107: /* }}} */
108:
109: /* {{{ php_mb_default_identify_list */
110: typedef struct _php_mb_nls_ident_list {
111: enum mbfl_no_language lang;
112: const enum mbfl_no_encoding* list;
113: int list_size;
114: } php_mb_nls_ident_list;
115:
116: static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117: mbfl_no_encoding_ascii,
118: mbfl_no_encoding_jis,
119: mbfl_no_encoding_utf8,
120: mbfl_no_encoding_euc_jp,
121: mbfl_no_encoding_sjis
122: };
123:
124: static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125: mbfl_no_encoding_ascii,
126: mbfl_no_encoding_utf8,
127: mbfl_no_encoding_euc_cn,
128: mbfl_no_encoding_cp936
129: };
130:
131: static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132: mbfl_no_encoding_ascii,
133: mbfl_no_encoding_utf8,
134: mbfl_no_encoding_euc_tw,
135: mbfl_no_encoding_big5
136: };
137:
138: static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139: mbfl_no_encoding_ascii,
140: mbfl_no_encoding_utf8,
141: mbfl_no_encoding_euc_kr,
142: mbfl_no_encoding_uhc
143: };
144:
145: static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146: mbfl_no_encoding_ascii,
147: mbfl_no_encoding_utf8,
148: mbfl_no_encoding_koi8r,
149: mbfl_no_encoding_cp1251,
150: mbfl_no_encoding_cp866
151: };
152:
153: static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154: mbfl_no_encoding_ascii,
155: mbfl_no_encoding_utf8,
156: mbfl_no_encoding_armscii8
157: };
158:
159: static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160: mbfl_no_encoding_ascii,
161: mbfl_no_encoding_utf8,
162: mbfl_no_encoding_cp1254,
163: mbfl_no_encoding_8859_9
164: };
165:
166: static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167: mbfl_no_encoding_ascii,
168: mbfl_no_encoding_utf8,
169: mbfl_no_encoding_koi8u
170: };
171:
172: static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173: mbfl_no_encoding_ascii,
174: mbfl_no_encoding_utf8
175: };
176:
177:
178: static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179: { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180: { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181: { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182: { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183: { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184: { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185: { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186: { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187: { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188: };
189:
190: /* }}} */
191:
192: /* {{{ mb_overload_def mb_ovld[] */
193: static const struct mb_overload_def mb_ovld[] = {
194: {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195: {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196: {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197: {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198: {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199: {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200: {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201: {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202: {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203: {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204: {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205: {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206: {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207: #if HAVE_MBREGEX
208: {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
209: {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
210: {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
211: {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
212: {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
213: #endif
214: {0, NULL, NULL, NULL}
215: };
216: /* }}} */
217:
218: /* {{{ arginfo */
219: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
220: ZEND_ARG_INFO(0, language)
221: ZEND_END_ARG_INFO()
222:
223: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
224: ZEND_ARG_INFO(0, encoding)
225: ZEND_END_ARG_INFO()
226:
227: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
228: ZEND_ARG_INFO(0, type)
229: ZEND_END_ARG_INFO()
230:
231: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
232: ZEND_ARG_INFO(0, encoding)
233: ZEND_END_ARG_INFO()
234:
235: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
236: ZEND_ARG_INFO(0, encoding)
237: ZEND_END_ARG_INFO()
238:
239: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
240: ZEND_ARG_INFO(0, substchar)
241: ZEND_END_ARG_INFO()
242:
243: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
244: ZEND_ARG_INFO(0, encoding)
245: ZEND_END_ARG_INFO()
246:
247: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
248: ZEND_ARG_INFO(0, encoded_string)
249: ZEND_ARG_INFO(1, result)
250: ZEND_END_ARG_INFO()
251:
252: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
253: ZEND_ARG_INFO(0, contents)
254: ZEND_ARG_INFO(0, status)
255: ZEND_END_ARG_INFO()
256:
257: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
258: ZEND_ARG_INFO(0, str)
259: ZEND_ARG_INFO(0, encoding)
260: ZEND_END_ARG_INFO()
261:
262: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
263: ZEND_ARG_INFO(0, haystack)
264: ZEND_ARG_INFO(0, needle)
265: ZEND_ARG_INFO(0, offset)
266: ZEND_ARG_INFO(0, encoding)
267: ZEND_END_ARG_INFO()
268:
269: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
270: ZEND_ARG_INFO(0, haystack)
271: ZEND_ARG_INFO(0, needle)
272: ZEND_ARG_INFO(0, offset)
273: ZEND_ARG_INFO(0, encoding)
274: ZEND_END_ARG_INFO()
275:
276: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
277: ZEND_ARG_INFO(0, haystack)
278: ZEND_ARG_INFO(0, needle)
279: ZEND_ARG_INFO(0, offset)
280: ZEND_ARG_INFO(0, encoding)
281: ZEND_END_ARG_INFO()
282:
283: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
284: ZEND_ARG_INFO(0, haystack)
285: ZEND_ARG_INFO(0, needle)
286: ZEND_ARG_INFO(0, offset)
287: ZEND_ARG_INFO(0, encoding)
288: ZEND_END_ARG_INFO()
289:
290: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
291: ZEND_ARG_INFO(0, haystack)
292: ZEND_ARG_INFO(0, needle)
293: ZEND_ARG_INFO(0, part)
294: ZEND_ARG_INFO(0, encoding)
295: ZEND_END_ARG_INFO()
296:
297: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
298: ZEND_ARG_INFO(0, haystack)
299: ZEND_ARG_INFO(0, needle)
300: ZEND_ARG_INFO(0, part)
301: ZEND_ARG_INFO(0, encoding)
302: ZEND_END_ARG_INFO()
303:
304: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
305: ZEND_ARG_INFO(0, haystack)
306: ZEND_ARG_INFO(0, needle)
307: ZEND_ARG_INFO(0, part)
308: ZEND_ARG_INFO(0, encoding)
309: ZEND_END_ARG_INFO()
310:
311: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
312: ZEND_ARG_INFO(0, haystack)
313: ZEND_ARG_INFO(0, needle)
314: ZEND_ARG_INFO(0, part)
315: ZEND_ARG_INFO(0, encoding)
316: ZEND_END_ARG_INFO()
317:
318: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
319: ZEND_ARG_INFO(0, haystack)
320: ZEND_ARG_INFO(0, needle)
321: ZEND_ARG_INFO(0, encoding)
322: ZEND_END_ARG_INFO()
323:
324: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
325: ZEND_ARG_INFO(0, str)
326: ZEND_ARG_INFO(0, start)
327: ZEND_ARG_INFO(0, length)
328: ZEND_ARG_INFO(0, encoding)
329: ZEND_END_ARG_INFO()
330:
331: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
332: ZEND_ARG_INFO(0, str)
333: ZEND_ARG_INFO(0, start)
334: ZEND_ARG_INFO(0, length)
335: ZEND_ARG_INFO(0, encoding)
336: ZEND_END_ARG_INFO()
337:
338: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
339: ZEND_ARG_INFO(0, str)
340: ZEND_ARG_INFO(0, encoding)
341: ZEND_END_ARG_INFO()
342:
343: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
344: ZEND_ARG_INFO(0, str)
345: ZEND_ARG_INFO(0, start)
346: ZEND_ARG_INFO(0, width)
347: ZEND_ARG_INFO(0, trimmarker)
348: ZEND_ARG_INFO(0, encoding)
349: ZEND_END_ARG_INFO()
350:
351: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
352: ZEND_ARG_INFO(0, str)
353: ZEND_ARG_INFO(0, to)
354: ZEND_ARG_INFO(0, from)
355: ZEND_END_ARG_INFO()
356:
357: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
358: ZEND_ARG_INFO(0, sourcestring)
359: ZEND_ARG_INFO(0, mode)
360: ZEND_ARG_INFO(0, encoding)
361: ZEND_END_ARG_INFO()
362:
363: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
364: ZEND_ARG_INFO(0, sourcestring)
365: ZEND_ARG_INFO(0, encoding)
366: ZEND_END_ARG_INFO()
367:
368: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
369: ZEND_ARG_INFO(0, sourcestring)
370: ZEND_ARG_INFO(0, encoding)
371: ZEND_END_ARG_INFO()
372:
373: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
374: ZEND_ARG_INFO(0, str)
375: ZEND_ARG_INFO(0, encoding_list)
376: ZEND_ARG_INFO(0, strict)
377: ZEND_END_ARG_INFO()
378:
379: ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
380: ZEND_END_ARG_INFO()
381:
382: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
383: ZEND_ARG_INFO(0, encoding)
384: ZEND_END_ARG_INFO()
385:
386: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
387: ZEND_ARG_INFO(0, str)
388: ZEND_ARG_INFO(0, charset)
389: ZEND_ARG_INFO(0, transfer)
390: ZEND_ARG_INFO(0, linefeed)
391: ZEND_ARG_INFO(0, indent)
392: ZEND_END_ARG_INFO()
393:
394: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
395: ZEND_ARG_INFO(0, string)
396: ZEND_END_ARG_INFO()
397:
398: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
399: ZEND_ARG_INFO(0, str)
400: ZEND_ARG_INFO(0, option)
401: ZEND_ARG_INFO(0, encoding)
402: ZEND_END_ARG_INFO()
403:
404: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
405: ZEND_ARG_INFO(0, to)
406: ZEND_ARG_INFO(0, from)
407: ZEND_ARG_INFO(1, ...)
408: ZEND_END_ARG_INFO()
409:
410: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
411: ZEND_ARG_INFO(0, string)
412: ZEND_ARG_INFO(0, convmap)
413: ZEND_ARG_INFO(0, encoding)
414: ZEND_END_ARG_INFO()
415:
416: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
417: ZEND_ARG_INFO(0, string)
418: ZEND_ARG_INFO(0, convmap)
419: ZEND_ARG_INFO(0, encoding)
420: ZEND_END_ARG_INFO()
421:
422: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
423: ZEND_ARG_INFO(0, to)
424: ZEND_ARG_INFO(0, subject)
425: ZEND_ARG_INFO(0, message)
426: ZEND_ARG_INFO(0, additional_headers)
427: ZEND_ARG_INFO(0, additional_parameters)
428: ZEND_END_ARG_INFO()
429:
430: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
431: ZEND_ARG_INFO(0, type)
432: ZEND_END_ARG_INFO()
433:
434: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
435: ZEND_ARG_INFO(0, var)
436: ZEND_ARG_INFO(0, encoding)
437: ZEND_END_ARG_INFO()
438:
439: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
440: ZEND_ARG_INFO(0, encoding)
441: ZEND_END_ARG_INFO()
442:
443: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
444: ZEND_ARG_INFO(0, pattern)
445: ZEND_ARG_INFO(0, string)
446: ZEND_ARG_INFO(1, registers)
447: ZEND_END_ARG_INFO()
448:
449: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
450: ZEND_ARG_INFO(0, pattern)
451: ZEND_ARG_INFO(0, string)
452: ZEND_ARG_INFO(1, registers)
453: ZEND_END_ARG_INFO()
454:
455: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
456: ZEND_ARG_INFO(0, pattern)
457: ZEND_ARG_INFO(0, replacement)
458: ZEND_ARG_INFO(0, string)
459: ZEND_ARG_INFO(0, option)
460: ZEND_END_ARG_INFO()
461:
462: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
463: ZEND_ARG_INFO(0, pattern)
464: ZEND_ARG_INFO(0, replacement)
465: ZEND_ARG_INFO(0, string)
466: ZEND_END_ARG_INFO()
467:
468: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
469: ZEND_ARG_INFO(0, pattern)
470: ZEND_ARG_INFO(0, string)
471: ZEND_ARG_INFO(0, limit)
472: ZEND_END_ARG_INFO()
473:
474: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
475: ZEND_ARG_INFO(0, pattern)
476: ZEND_ARG_INFO(0, string)
477: ZEND_ARG_INFO(0, option)
478: ZEND_END_ARG_INFO()
479:
480: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
481: ZEND_ARG_INFO(0, pattern)
482: ZEND_ARG_INFO(0, option)
483: ZEND_END_ARG_INFO()
484:
485: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
486: ZEND_ARG_INFO(0, pattern)
487: ZEND_ARG_INFO(0, option)
488: ZEND_END_ARG_INFO()
489:
490: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
491: ZEND_ARG_INFO(0, pattern)
492: ZEND_ARG_INFO(0, option)
493: ZEND_END_ARG_INFO()
494:
495: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
496: ZEND_ARG_INFO(0, string)
497: ZEND_ARG_INFO(0, pattern)
498: ZEND_ARG_INFO(0, option)
499: ZEND_END_ARG_INFO()
500:
501: ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
502: ZEND_END_ARG_INFO()
503:
504: ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
505: ZEND_END_ARG_INFO()
506:
507: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
508: ZEND_ARG_INFO(0, position)
509: ZEND_END_ARG_INFO()
510:
511: ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
512: ZEND_ARG_INFO(0, options)
513: ZEND_END_ARG_INFO()
514: /* }}} */
515:
516: /* {{{ zend_function_entry mbstring_functions[] */
517: const zend_function_entry mbstring_functions[] = {
518: PHP_FE(mb_convert_case, arginfo_mb_convert_case)
519: PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
520: PHP_FE(mb_strtolower, arginfo_mb_strtolower)
521: PHP_FE(mb_language, arginfo_mb_language)
522: PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
523: PHP_FE(mb_http_input, arginfo_mb_http_input)
524: PHP_FE(mb_http_output, arginfo_mb_http_output)
525: PHP_FE(mb_detect_order, arginfo_mb_detect_order)
526: PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
527: PHP_FE(mb_parse_str, arginfo_mb_parse_str)
528: PHP_FE(mb_output_handler, arginfo_mb_output_handler)
529: PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
530: PHP_FE(mb_strlen, arginfo_mb_strlen)
531: PHP_FE(mb_strpos, arginfo_mb_strpos)
532: PHP_FE(mb_strrpos, arginfo_mb_strrpos)
533: PHP_FE(mb_stripos, arginfo_mb_stripos)
534: PHP_FE(mb_strripos, arginfo_mb_strripos)
535: PHP_FE(mb_strstr, arginfo_mb_strstr)
536: PHP_FE(mb_strrchr, arginfo_mb_strrchr)
537: PHP_FE(mb_stristr, arginfo_mb_stristr)
538: PHP_FE(mb_strrichr, arginfo_mb_strrichr)
539: PHP_FE(mb_substr_count, arginfo_mb_substr_count)
540: PHP_FE(mb_substr, arginfo_mb_substr)
541: PHP_FE(mb_strcut, arginfo_mb_strcut)
542: PHP_FE(mb_strwidth, arginfo_mb_strwidth)
543: PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
544: PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
545: PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
546: PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
547: PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
548: PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
549: PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
550: PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
551: PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
552: PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
553: PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
554: PHP_FE(mb_send_mail, arginfo_mb_send_mail)
555: PHP_FE(mb_get_info, arginfo_mb_get_info)
556: PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
557: #if HAVE_MBREGEX
558: PHP_MBREGEX_FUNCTION_ENTRIES
559: #endif
560: PHP_FE_END
561: };
562: /* }}} */
563:
564: /* {{{ zend_module_entry mbstring_module_entry */
565: zend_module_entry mbstring_module_entry = {
566: STANDARD_MODULE_HEADER,
567: "mbstring",
568: mbstring_functions,
569: PHP_MINIT(mbstring),
570: PHP_MSHUTDOWN(mbstring),
571: PHP_RINIT(mbstring),
572: PHP_RSHUTDOWN(mbstring),
573: PHP_MINFO(mbstring),
574: NO_VERSION_YET,
575: PHP_MODULE_GLOBALS(mbstring),
576: PHP_GINIT(mbstring),
577: PHP_GSHUTDOWN(mbstring),
578: NULL,
579: STANDARD_MODULE_PROPERTIES_EX
580: };
581: /* }}} */
582:
583: /* {{{ static sapi_post_entry php_post_entries[] */
584: static sapi_post_entry php_post_entries[] = {
585: { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
586: { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
587: { NULL, 0, NULL, NULL }
588: };
589: /* }}} */
590:
591: #ifdef COMPILE_DL_MBSTRING
592: ZEND_GET_MODULE(mbstring)
593: #endif
594:
595: /* {{{ allocators */
596: static void *_php_mb_allocators_malloc(unsigned int sz)
597: {
598: return emalloc(sz);
599: }
600:
601: static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
602: {
603: return erealloc(ptr, sz);
604: }
605:
606: static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
607: {
608: return ecalloc(nelems, szelem);
609: }
610:
611: static void _php_mb_allocators_free(void *ptr)
612: {
613: efree(ptr);
614: }
615:
616: static void *_php_mb_allocators_pmalloc(unsigned int sz)
617: {
618: return pemalloc(sz, 1);
619: }
620:
621: static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
622: {
623: return perealloc(ptr, sz, 1);
624: }
625:
626: static void _php_mb_allocators_pfree(void *ptr)
627: {
628: pefree(ptr, 1);
629: }
630:
631: static mbfl_allocators _php_mb_allocators = {
632: _php_mb_allocators_malloc,
633: _php_mb_allocators_realloc,
634: _php_mb_allocators_calloc,
635: _php_mb_allocators_free,
636: _php_mb_allocators_pmalloc,
637: _php_mb_allocators_prealloc,
638: _php_mb_allocators_pfree
639: };
640: /* }}} */
641:
642: /* {{{ static sapi_post_entry mbstr_post_entries[] */
643: static sapi_post_entry mbstr_post_entries[] = {
644: { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
645: { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
646: { NULL, 0, NULL, NULL }
647: };
648: /* }}} */
649:
650: /* {{{ static int php_mb_parse_encoding_list()
651: * Return 0 if input contains any illegal encoding, otherwise 1.
652: * Even if any illegal encoding is detected the result may contain a list
653: * of parsed encodings.
654: */
655: static int
656: php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
657: {
658: int n, l, size, bauto, ret = 1;
659: char *p, *p1, *p2, *endp, *tmpstr;
660: enum mbfl_no_encoding no_encoding;
661: enum mbfl_no_encoding *src, *entry, *list;
662:
663: list = NULL;
664: if (value == NULL || value_length <= 0) {
665: if (return_list) {
666: *return_list = NULL;
667: }
668: if (return_size) {
669: *return_size = 0;
670: }
671: return 0;
672: } else {
673: enum mbfl_no_encoding *identify_list;
674: int identify_list_size;
675:
676: identify_list = MBSTRG(default_detect_order_list);
677: identify_list_size = MBSTRG(default_detect_order_list_size);
678:
679: /* copy the value string for work */
680: if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
681: tmpstr = (char *)estrndup(value+1, value_length-2);
682: value_length -= 2;
683: }
684: else
685: tmpstr = (char *)estrndup(value, value_length);
686: if (tmpstr == NULL) {
687: return 0;
688: }
689: /* count the number of listed encoding names */
690: endp = tmpstr + value_length;
691: n = 1;
692: p1 = tmpstr;
693: while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
694: p1 = p2 + 1;
695: n++;
696: }
697: size = n + identify_list_size;
698: /* make list */
699: list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
700: if (list != NULL) {
701: entry = list;
702: n = 0;
703: bauto = 0;
704: p1 = tmpstr;
705: do {
706: p2 = p = php_memnstr(p1, ",", 1, endp);
707: if (p == NULL) {
708: p = endp;
709: }
710: *p = '\0';
711: /* trim spaces */
712: while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
713: p1++;
714: }
715: p--;
716: while (p > p1 && (*p == ' ' || *p == '\t')) {
717: *p = '\0';
718: p--;
719: }
720: /* convert to the encoding number and check encoding */
721: if (strcasecmp(p1, "auto") == 0) {
722: if (!bauto) {
723: bauto = 1;
724: l = identify_list_size;
725: src = identify_list;
726: while (l > 0) {
727: *entry++ = *src++;
728: l--;
729: n++;
730: }
731: }
732: } else {
733: no_encoding = mbfl_name2no_encoding(p1);
734: if (no_encoding != mbfl_no_encoding_invalid) {
735: *entry++ = no_encoding;
736: n++;
737: } else {
738: ret = 0;
739: }
740: }
741: p1 = p2 + 1;
742: } while (n < size && p2 != NULL);
743: if (n > 0) {
744: if (return_list) {
745: *return_list = list;
746: } else {
747: pefree(list, persistent);
748: }
749: } else {
750: pefree(list, persistent);
751: if (return_list) {
752: *return_list = NULL;
753: }
754: ret = 0;
755: }
756: if (return_size) {
757: *return_size = n;
758: }
759: } else {
760: if (return_list) {
761: *return_list = NULL;
762: }
763: if (return_size) {
764: *return_size = 0;
765: }
766: ret = 0;
767: }
768: efree(tmpstr);
769: }
770:
771: return ret;
772: }
773: /* }}} */
774:
775: /* {{{ MBSTRING_API php_mb_check_encoding_list */
776: MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
777: return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
778: }
779: /* }}} */
780:
781: /* {{{ static int php_mb_parse_encoding_array()
782: * Return 0 if input contains any illegal encoding, otherwise 1.
783: * Even if any illegal encoding is detected the result may contain a list
784: * of parsed encodings.
785: */
786: static int
787: php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
788: {
789: zval **hash_entry;
790: HashTable *target_hash;
791: int i, n, l, size, bauto,ret = 1;
792: enum mbfl_no_encoding no_encoding;
793: enum mbfl_no_encoding *src, *list, *entry;
794:
795: list = NULL;
796: if (Z_TYPE_P(array) == IS_ARRAY) {
797: enum mbfl_no_encoding *identify_list;
798: int identify_list_size;
799:
800: identify_list = MBSTRG(default_detect_order_list);
801: identify_list_size = MBSTRG(default_detect_order_list_size);
802:
803: target_hash = Z_ARRVAL_P(array);
804: zend_hash_internal_pointer_reset(target_hash);
805: i = zend_hash_num_elements(target_hash);
806: size = i + identify_list_size;
807: list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
808: if (list != NULL) {
809: entry = list;
810: bauto = 0;
811: n = 0;
812: while (i > 0) {
813: if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
814: break;
815: }
816: convert_to_string_ex(hash_entry);
817: if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
818: if (!bauto) {
819: bauto = 1;
820: l = identify_list_size;
821: src = identify_list;
822: while (l > 0) {
823: *entry++ = *src++;
824: l--;
825: n++;
826: }
827: }
828: } else {
829: no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
830: if (no_encoding != mbfl_no_encoding_invalid) {
831: *entry++ = no_encoding;
832: n++;
833: } else {
834: ret = 0;
835: }
836: }
837: zend_hash_move_forward(target_hash);
838: i--;
839: }
840: if (n > 0) {
841: if (return_list) {
842: *return_list = list;
843: } else {
844: pefree(list, persistent);
845: }
846: } else {
847: pefree(list, persistent);
848: if (return_list) {
849: *return_list = NULL;
850: }
851: ret = 0;
852: }
853: if (return_size) {
854: *return_size = n;
855: }
856: } else {
857: if (return_list) {
858: *return_list = NULL;
859: }
860: if (return_size) {
861: *return_size = 0;
862: }
863: ret = 0;
864: }
865: }
866:
867: return ret;
868: }
869: /* }}} */
870:
871: static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
872: static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
873: static void _php_mb_free_regex(void *opaque);
874:
875: #if HAVE_ONIG
876: /* {{{ _php_mb_compile_regex */
877: static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
878: {
879: php_mb_regex_t *retval;
880: OnigErrorInfo err_info;
881: int err_code;
882:
883: if ((err_code = onig_new(&retval,
884: (const OnigUChar *)pattern,
885: (const OnigUChar *)pattern + strlen(pattern),
886: ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
887: ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
888: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
889: onig_error_code_to_str(err_str, err_code, err_info);
890: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
891: retval = NULL;
892: }
893: return retval;
894: }
895: /* }}} */
896:
897: /* {{{ _php_mb_match_regex */
898: static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
899: {
900: return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
901: (const OnigUChar*)str + str_len, (const OnigUChar *)str,
902: (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
903: }
904: /* }}} */
905:
906: /* {{{ _php_mb_free_regex */
907: static void _php_mb_free_regex(void *opaque)
908: {
909: onig_free((php_mb_regex_t *)opaque);
910: }
911: /* }}} */
912: #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
913: /* {{{ _php_mb_compile_regex */
914: static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
915: {
916: pcre *retval;
917: const char *err_str;
918: int err_offset;
919:
920: if (!(retval = pcre_compile(pattern,
921: PCRE_CASELESS, &err_str, &err_offset, NULL))) {
922: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
923: }
924: return retval;
925: }
926: /* }}} */
927:
928: /* {{{ _php_mb_match_regex */
929: static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
930: {
931: return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
932: 0, NULL, 0) >= 0;
933: }
934: /* }}} */
935:
936: /* {{{ _php_mb_free_regex */
937: static void _php_mb_free_regex(void *opaque)
938: {
939: pcre_free(opaque);
940: }
941: /* }}} */
942: #endif
943:
944: /* {{{ php_mb_nls_get_default_detect_order_list */
945: static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
946: {
947: size_t i;
948:
949: *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
950: *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
951:
952: for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
953: if (php_mb_default_identify_list[i].lang == lang) {
954: *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
955: *plist_size = php_mb_default_identify_list[i].list_size;
956: return 1;
957: }
958: }
959: return 0;
960: }
961: /* }}} */
962:
963: /* {{{ php.ini directive handler */
964: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
965: static PHP_INI_MH(OnUpdate_mbstring_language)
966: {
967: enum mbfl_no_language no_language;
968:
969: no_language = mbfl_name2no_language(new_value);
970: if (no_language == mbfl_no_language_invalid) {
971: MBSTRG(language) = mbfl_no_language_neutral;
972: return FAILURE;
973: }
974: MBSTRG(language) = no_language;
975: php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
976: return SUCCESS;
977: }
978: /* }}} */
979:
980: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
981: static PHP_INI_MH(OnUpdate_mbstring_detect_order)
982: {
983: enum mbfl_no_encoding *list;
984: int size;
985:
986: if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
987: if (MBSTRG(detect_order_list)) {
988: free(MBSTRG(detect_order_list));
989: }
990: MBSTRG(detect_order_list) = list;
991: MBSTRG(detect_order_list_size) = size;
992: } else {
993: if (MBSTRG(detect_order_list)) {
994: free(MBSTRG(detect_order_list));
995: MBSTRG(detect_order_list) = NULL;
996: }
997: return FAILURE;
998: }
999:
1000: return SUCCESS;
1001: }
1002: /* }}} */
1003:
1004: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1005: static PHP_INI_MH(OnUpdate_mbstring_http_input)
1006: {
1007: enum mbfl_no_encoding *list;
1008: int size;
1009:
1010: if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1011: if (MBSTRG(http_input_list)) {
1012: free(MBSTRG(http_input_list));
1013: }
1014: MBSTRG(http_input_list) = list;
1015: MBSTRG(http_input_list_size) = size;
1016: } else {
1017: if (MBSTRG(http_input_list)) {
1018: free(MBSTRG(http_input_list));
1019: MBSTRG(http_input_list) = NULL;
1020: }
1021: MBSTRG(http_input_list_size) = 0;
1022: return FAILURE;
1023: }
1024:
1025: return SUCCESS;
1026: }
1027: /* }}} */
1028:
1029: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1030: static PHP_INI_MH(OnUpdate_mbstring_http_output)
1031: {
1032: enum mbfl_no_encoding no_encoding;
1033:
1034: no_encoding = mbfl_name2no_encoding(new_value);
1035: if (no_encoding != mbfl_no_encoding_invalid) {
1036: MBSTRG(http_output_encoding) = no_encoding;
1037: MBSTRG(current_http_output_encoding) = no_encoding;
1038: } else {
1039: MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
1040: MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
1041: if (new_value != NULL && new_value_length > 0) {
1042: return FAILURE;
1043: }
1044: }
1045:
1046: return SUCCESS;
1047: }
1048: /* }}} */
1049:
1050: /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1051: int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1052: {
1053: enum mbfl_no_encoding no_encoding;
1054: const char *enc_name = NULL;
1055: uint enc_name_len = 0;
1056:
1057: no_encoding = new_value ? mbfl_name2no_encoding(new_value):
1058: mbfl_no_encoding_invalid;
1059: if (no_encoding != mbfl_no_encoding_invalid) {
1060: enc_name = new_value;
1061: enc_name_len = new_value_length;
1062: } else {
1063: switch (MBSTRG(language)) {
1064: case mbfl_no_language_uni:
1065: enc_name = "UTF-8";
1066: enc_name_len = sizeof("UTF-8") - 1;
1067: break;
1068: case mbfl_no_language_japanese:
1069: enc_name = "EUC-JP";
1070: enc_name_len = sizeof("EUC-JP") - 1;
1071: break;
1072: case mbfl_no_language_korean:
1073: enc_name = "EUC-KR";
1074: enc_name_len = sizeof("EUC-KR") - 1;
1075: break;
1076: case mbfl_no_language_simplified_chinese:
1077: enc_name = "EUC-CN";
1078: enc_name_len = sizeof("EUC-CN") - 1;
1079: break;
1080: case mbfl_no_language_traditional_chinese:
1081: enc_name = "EUC-TW";
1082: enc_name_len = sizeof("EUC-TW") - 1;
1083: break;
1084: case mbfl_no_language_russian:
1085: enc_name = "KOI8-R";
1086: enc_name_len = sizeof("KOI8-R") - 1;
1087: break;
1088: case mbfl_no_language_german:
1089: enc_name = "ISO-8859-15";
1090: enc_name_len = sizeof("ISO-8859-15") - 1;
1091: break;
1092: case mbfl_no_language_armenian:
1093: enc_name = "ArmSCII-8";
1094: enc_name_len = sizeof("ArmSCII-8") - 1;
1095: break;
1096: case mbfl_no_language_turkish:
1097: enc_name = "ISO-8859-9";
1098: enc_name_len = sizeof("ISO-8859-9") - 1;
1099: break;
1100: default:
1101: enc_name = "ISO-8859-1";
1102: enc_name_len = sizeof("ISO-8859-1") - 1;
1103: break;
1104: }
1105: no_encoding = mbfl_name2no_encoding(enc_name);
1106: }
1107: MBSTRG(internal_encoding) = no_encoding;
1108: MBSTRG(current_internal_encoding) = no_encoding;
1109: #if HAVE_MBREGEX
1110: {
1111: const char *enc_name = new_value;
1112: if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1113: /* falls back to EUC-JP if an unknown encoding name is given */
1114: enc_name = "EUC-JP";
1115: php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1116: }
1117: php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1118: }
1119: #endif
1120: return SUCCESS;
1121: }
1122: /* }}} */
1123:
1124: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1125: static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1126: {
1127: if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1128: || stage == PHP_INI_STAGE_RUNTIME) {
1129: return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1130: } else {
1131: /* the corresponding mbstring globals needs to be set according to the
1132: * ini value in the later stage because it never falls back to the
1133: * default value if 1. no value for mbstring.internal_encoding is given,
1134: * 2. mbstring.language directive is processed in per-dir or runtime
1135: * context and 3. call to the handler for mbstring.language is done
1136: * after mbstring.internal_encoding is handled. */
1137: return SUCCESS;
1138: }
1139: }
1140: /* }}} */
1141:
1142: #ifdef ZEND_MULTIBYTE
1143: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
1144: static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
1145: {
1146: int *list, size;
1147:
1148: if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1149: if (MBSTRG(script_encoding_list) != NULL) {
1150: free(MBSTRG(script_encoding_list));
1151: }
1152: MBSTRG(script_encoding_list) = list;
1153: MBSTRG(script_encoding_list_size) = size;
1154: } else {
1155: if (MBSTRG(script_encoding_list) != NULL) {
1156: free(MBSTRG(script_encoding_list));
1157: }
1158: MBSTRG(script_encoding_list) = NULL;
1159: MBSTRG(script_encoding_list_size) = 0;
1160: return FAILURE;
1161: }
1162:
1163: return SUCCESS;
1164: }
1165: /* }}} */
1166: #endif /* ZEND_MULTIBYTE */
1167:
1168: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1169: static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1170: {
1171: int c;
1172: char *endptr = NULL;
1173:
1174: if (new_value != NULL) {
1175: if (strcasecmp("none", new_value) == 0) {
1176: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1177: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1178: } else if (strcasecmp("long", new_value) == 0) {
1179: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1180: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1181: } else if (strcasecmp("entity", new_value) == 0) {
1182: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1183: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1184: } else {
1185: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1186: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1187: if (new_value_length >0) {
1188: c = strtol(new_value, &endptr, 0);
1189: if (*endptr == '\0') {
1190: MBSTRG(filter_illegal_substchar) = c;
1191: MBSTRG(current_filter_illegal_substchar) = c;
1192: }
1193: }
1194: }
1195: } else {
1196: MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1197: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1198: MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1199: MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1200: }
1201:
1202: return SUCCESS;
1203: }
1204: /* }}} */
1205:
1206: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1207: static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1208: {
1209: if (new_value == NULL) {
1210: return FAILURE;
1211: }
1212:
1213: OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1214:
1215: if (MBSTRG(encoding_translation)) {
1216: sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1217: sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1218: } else {
1219: sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1220: sapi_register_post_entries(php_post_entries TSRMLS_CC);
1221: }
1222:
1223: return SUCCESS;
1224: }
1225: /* }}} */
1226:
1227: /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1228: static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1229: {
1230: zval tmp;
1231: void *re = NULL;
1232:
1233: if (!new_value) {
1234: new_value = entry->orig_value;
1235: new_value_length = entry->orig_value_length;
1236: }
1237: php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1238:
1239: if (Z_STRLEN(tmp) > 0) {
1240: if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1241: zval_dtor(&tmp);
1242: return FAILURE;
1243: }
1244: }
1245:
1246: if (MBSTRG(http_output_conv_mimetypes)) {
1247: _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1248: }
1249:
1250: MBSTRG(http_output_conv_mimetypes) = re;
1251:
1252: zval_dtor(&tmp);
1253: return SUCCESS;
1254: }
1255: /* }}} */
1256: /* }}} */
1257:
1258: /* {{{ php.ini directive registration */
1259: PHP_INI_BEGIN()
1260: PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1261: PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1262: PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1263: PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1264: PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
1265: #ifdef ZEND_MULTIBYTE
1266: PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
1267: #endif /* ZEND_MULTIBYTE */
1268: PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1269: STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1270: PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1271:
1272: STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1273: PHP_INI_SYSTEM | PHP_INI_PERDIR,
1274: OnUpdate_mbstring_encoding_translation,
1275: encoding_translation, zend_mbstring_globals, mbstring_globals)
1276: PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1277: "^(text/|application/xhtml\\+xml)",
1278: PHP_INI_ALL,
1279: OnUpdate_mbstring_http_output_conv_mimetypes)
1280:
1281: STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1282: PHP_INI_ALL,
1283: OnUpdateLong,
1284: strict_detection, zend_mbstring_globals, mbstring_globals)
1285: PHP_INI_END()
1286: /* }}} */
1287:
1288: /* {{{ module global initialize handler */
1289: static PHP_GINIT_FUNCTION(mbstring)
1290: {
1291: mbstring_globals->language = mbfl_no_language_uni;
1292: mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
1293: mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1294: #ifdef ZEND_MULTIBYTE
1295: mbstring_globals->script_encoding_list = NULL;
1296: mbstring_globals->script_encoding_list_size = 0;
1297: #endif /* ZEND_MULTIBYTE */
1298: mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
1299: mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
1300: mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
1301: mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
1302: mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
1303: mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
1304: mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
1305: mbstring_globals->http_input_list = NULL;
1306: mbstring_globals->http_input_list_size = 0;
1307: mbstring_globals->detect_order_list = NULL;
1308: mbstring_globals->detect_order_list_size = 0;
1309: mbstring_globals->current_detect_order_list = NULL;
1310: mbstring_globals->current_detect_order_list_size = 0;
1311: mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1312: mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1313: mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1314: mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1315: mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1316: mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1317: mbstring_globals->illegalchars = 0;
1318: mbstring_globals->func_overload = 0;
1319: mbstring_globals->encoding_translation = 0;
1320: mbstring_globals->strict_detection = 0;
1321: mbstring_globals->outconv = NULL;
1322: mbstring_globals->http_output_conv_mimetypes = NULL;
1323: #if HAVE_MBREGEX
1324: mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1325: #endif
1326: }
1327: /* }}} */
1328:
1329: /* {{{ PHP_GSHUTDOWN_FUNCTION */
1330: static PHP_GSHUTDOWN_FUNCTION(mbstring)
1331: {
1332: if (mbstring_globals->http_input_list) {
1333: free(mbstring_globals->http_input_list);
1334: }
1335: #ifdef ZEND_MULTIBYTE
1336: if (mbstring_globals->script_encoding_list) {
1337: free(mbstring_globals->script_encoding_list);
1338: }
1339: #endif /* ZEND_MULTIBYTE */
1340: if (mbstring_globals->detect_order_list) {
1341: free(mbstring_globals->detect_order_list);
1342: }
1343: if (mbstring_globals->http_output_conv_mimetypes) {
1344: _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1345: }
1346: #if HAVE_MBREGEX
1347: php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1348: #endif
1349: }
1350: /* }}} */
1351:
1352: /* {{{ PHP_MINIT_FUNCTION(mbstring) */
1353: PHP_MINIT_FUNCTION(mbstring)
1354: {
1355: __mbfl_allocators = &_php_mb_allocators;
1356:
1357: REGISTER_INI_ENTRIES();
1358:
1359: /* This is a global handler. Should not be set in a per-request handler. */
1360: sapi_register_treat_data(mbstr_treat_data);
1361:
1362: /* Post handlers are stored in the thread-local context. */
1363: if (MBSTRG(encoding_translation)) {
1364: sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1365: }
1366:
1367: REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1368: REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1369: REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1370:
1371: REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1372: REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1373: REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1374:
1375: #if HAVE_MBREGEX
1376: PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1377: #endif
1378: return SUCCESS;
1379: }
1380: /* }}} */
1381:
1382: /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1383: PHP_MSHUTDOWN_FUNCTION(mbstring)
1384: {
1385: UNREGISTER_INI_ENTRIES();
1386:
1387: #if HAVE_MBREGEX
1388: PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1389: #endif
1390:
1391: return SUCCESS;
1392: }
1393: /* }}} */
1394:
1395: /* {{{ PHP_RINIT_FUNCTION(mbstring) */
1396: PHP_RINIT_FUNCTION(mbstring)
1397: {
1398: int n;
1399: enum mbfl_no_encoding *list=NULL, *entry;
1400: zend_function *func, *orig;
1401: const struct mb_overload_def *p;
1402:
1403: MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1404: MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1405: MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1406: MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1407:
1408: MBSTRG(illegalchars) = 0;
1409:
1410: n = 0;
1411: if (MBSTRG(detect_order_list)) {
1412: list = MBSTRG(detect_order_list);
1413: n = MBSTRG(detect_order_list_size);
1414: }
1415: if (n <= 0) {
1416: list = MBSTRG(default_detect_order_list);
1417: n = MBSTRG(default_detect_order_list_size);
1418: }
1419: entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
1420: MBSTRG(current_detect_order_list) = entry;
1421: MBSTRG(current_detect_order_list_size) = n;
1422: while (n > 0) {
1423: *entry++ = *list++;
1424: n--;
1425: }
1426:
1427: /* override original function. */
1428: if (MBSTRG(func_overload)){
1429: p = &(mb_ovld[0]);
1430:
1431: while (p->type > 0) {
1432: if ((MBSTRG(func_overload) & p->type) == p->type &&
1433: zend_hash_find(EG(function_table), p->save_func,
1434: strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1435:
1436: zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1437:
1438: if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1439: php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1440: return FAILURE;
1441: } else {
1442: zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1443:
1444: if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1445: NULL) == FAILURE) {
1446: php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1447: return FAILURE;
1448: }
1449: }
1450: }
1451: p++;
1452: }
1453: }
1454: #if HAVE_MBREGEX
1455: PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1456: #endif
1457: #ifdef ZEND_MULTIBYTE
1458: zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
1459: php_mb_set_zend_encoding(TSRMLS_C);
1460: #endif /* ZEND_MULTIBYTE */
1461:
1462: return SUCCESS;
1463: }
1464: /* }}} */
1465:
1466: /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1467: PHP_RSHUTDOWN_FUNCTION(mbstring)
1468: {
1469: const struct mb_overload_def *p;
1470: zend_function *orig;
1471:
1472: if (MBSTRG(current_detect_order_list) != NULL) {
1473: efree(MBSTRG(current_detect_order_list));
1474: MBSTRG(current_detect_order_list) = NULL;
1475: MBSTRG(current_detect_order_list_size) = 0;
1476: }
1477: if (MBSTRG(outconv) != NULL) {
1478: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1479: mbfl_buffer_converter_delete(MBSTRG(outconv));
1480: MBSTRG(outconv) = NULL;
1481: }
1482:
1483: /* clear http input identification. */
1484: MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
1485: MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
1486: MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
1487: MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
1488: MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
1489:
1490: /* clear overloaded function. */
1491: if (MBSTRG(func_overload)){
1492: p = &(mb_ovld[0]);
1493: while (p->type > 0) {
1494: if ((MBSTRG(func_overload) & p->type) == p->type &&
1495: zend_hash_find(EG(function_table), p->save_func,
1496: strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1497:
1498: zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1499: zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1500: }
1501: p++;
1502: }
1503: }
1504:
1505: #if HAVE_MBREGEX
1506: PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1507: #endif
1508:
1509: return SUCCESS;
1510: }
1511: /* }}} */
1512:
1513: /* {{{ PHP_MINFO_FUNCTION(mbstring) */
1514: PHP_MINFO_FUNCTION(mbstring)
1515: {
1516: php_info_print_table_start();
1517: php_info_print_table_row(2, "Multibyte Support", "enabled");
1518: php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1519: php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1520: php_info_print_table_end();
1521:
1522: php_info_print_table_start();
1523: php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1524: php_info_print_table_end();
1525:
1526: #if HAVE_MBREGEX
1527: PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1528: #endif
1529:
1530: DISPLAY_INI_ENTRIES();
1531: }
1532: /* }}} */
1533:
1534: /* {{{ proto string mb_language([string language])
1535: Sets the current language or Returns the current language as a string */
1536: PHP_FUNCTION(mb_language)
1537: {
1538: char *name = NULL;
1539: int name_len = 0;
1540:
1541: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1542: return;
1543: }
1544: if (name == NULL) {
1545: RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1546: } else {
1547: if (FAILURE == zend_alter_ini_entry(
1548: "mbstring.language", sizeof("mbstring.language"),
1549: name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1550: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1551: RETVAL_FALSE;
1552: } else {
1553: RETVAL_TRUE;
1554: }
1555: }
1556: }
1557: /* }}} */
1558:
1559: /* {{{ proto string mb_internal_encoding([string encoding])
1560: Sets the current internal encoding or Returns the current internal encoding as a string */
1561: PHP_FUNCTION(mb_internal_encoding)
1562: {
1563: char *name = NULL;
1564: int name_len;
1565: enum mbfl_no_encoding no_encoding;
1566:
1567: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1568: RETURN_FALSE;
1569: }
1570: if (name == NULL) {
1571: name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
1572: if (name != NULL) {
1573: RETURN_STRING(name, 1);
1574: } else {
1575: RETURN_FALSE;
1576: }
1577: } else {
1578: no_encoding = mbfl_name2no_encoding(name);
1579: if (no_encoding == mbfl_no_encoding_invalid) {
1580: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1581: RETURN_FALSE;
1582: } else {
1583: MBSTRG(current_internal_encoding) = no_encoding;
1584: #ifdef ZEND_MULTIBYTE
1585: /* TODO: make independent from mbstring.encoding_translation? */
1586: if (MBSTRG(encoding_translation)) {
1587: zend_multibyte_set_internal_encoding(name TSRMLS_CC);
1588: }
1589: #endif /* ZEND_MULTIBYTE */
1590: RETURN_TRUE;
1591: }
1592: }
1593: }
1594: /* }}} */
1595:
1596: /* {{{ proto mixed mb_http_input([string type])
1597: Returns the input encoding */
1598: PHP_FUNCTION(mb_http_input)
1599: {
1600: char *typ = NULL;
1601: int typ_len;
1602: int retname, n;
1603: char *name, *list, *temp;
1604: enum mbfl_no_encoding *entry;
1605: enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
1606:
1607: retname = 1;
1608: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1609: RETURN_FALSE;
1610: }
1611: if (typ == NULL) {
1612: result = MBSTRG(http_input_identify);
1613: } else {
1614: switch (*typ) {
1615: case 'G':
1616: case 'g':
1617: result = MBSTRG(http_input_identify_get);
1618: break;
1619: case 'P':
1620: case 'p':
1621: result = MBSTRG(http_input_identify_post);
1622: break;
1623: case 'C':
1624: case 'c':
1625: result = MBSTRG(http_input_identify_cookie);
1626: break;
1627: case 'S':
1628: case 's':
1629: result = MBSTRG(http_input_identify_string);
1630: break;
1631: case 'I':
1632: case 'i':
1633: array_init(return_value);
1634: entry = MBSTRG(http_input_list);
1635: n = MBSTRG(http_input_list_size);
1636: while (n > 0) {
1637: name = (char *)mbfl_no_encoding2name(*entry);
1638: if (name) {
1639: add_next_index_string(return_value, name, 1);
1640: }
1641: entry++;
1642: n--;
1643: }
1644: retname = 0;
1645: break;
1646: case 'L':
1647: case 'l':
1648: entry = MBSTRG(http_input_list);
1649: n = MBSTRG(http_input_list_size);
1650: list = NULL;
1651: while (n > 0) {
1652: name = (char *)mbfl_no_encoding2name(*entry);
1653: if (name) {
1654: if (list) {
1655: temp = list;
1656: spprintf(&list, 0, "%s,%s", temp, name);
1657: efree(temp);
1658: if (!list) {
1659: break;
1660: }
1661: } else {
1662: list = estrdup(name);
1663: }
1664: }
1665: entry++;
1666: n--;
1667: }
1668: if (!list) {
1669: RETURN_FALSE;
1670: }
1671: RETVAL_STRING(list, 0);
1672: retname = 0;
1673: break;
1674: default:
1675: result = MBSTRG(http_input_identify);
1676: break;
1677: }
1678: }
1679:
1680: if (retname) {
1681: if (result != mbfl_no_encoding_invalid &&
1682: (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
1683: RETVAL_STRING(name, 1);
1684: } else {
1685: RETVAL_FALSE;
1686: }
1687: }
1688: }
1689: /* }}} */
1690:
1691: /* {{{ proto string mb_http_output([string encoding])
1692: Sets the current output_encoding or returns the current output_encoding as a string */
1693: PHP_FUNCTION(mb_http_output)
1694: {
1695: char *name = NULL;
1696: int name_len;
1697: enum mbfl_no_encoding no_encoding;
1698:
1699: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1700: RETURN_FALSE;
1701: }
1702:
1703: if (name == NULL) {
1704: name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
1705: if (name != NULL) {
1706: RETURN_STRING(name, 1);
1707: } else {
1708: RETURN_FALSE;
1709: }
1710: } else {
1711: no_encoding = mbfl_name2no_encoding(name);
1712: if (no_encoding == mbfl_no_encoding_invalid) {
1713: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1714: RETURN_FALSE;
1715: } else {
1716: MBSTRG(current_http_output_encoding) = no_encoding;
1717: RETURN_TRUE;
1718: }
1719: }
1720: }
1721: /* }}} */
1722:
1723: /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1724: Sets the current detect_order or Return the current detect_order as a array */
1725: PHP_FUNCTION(mb_detect_order)
1726: {
1727: zval **arg1 = NULL;
1728: int n, size;
1729: enum mbfl_no_encoding *list, *entry;
1730: char *name;
1731:
1732: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1733: return;
1734: }
1735:
1736: if (!arg1) {
1737: array_init(return_value);
1738: entry = MBSTRG(current_detect_order_list);
1739: n = MBSTRG(current_detect_order_list_size);
1740: while (n > 0) {
1741: name = (char *)mbfl_no_encoding2name(*entry);
1742: if (name) {
1743: add_next_index_string(return_value, name, 1);
1744: }
1745: entry++;
1746: n--;
1747: }
1748: } else {
1749: list = NULL;
1750: size = 0;
1751: switch (Z_TYPE_PP(arg1)) {
1752: case IS_ARRAY:
1753: if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1754: if (list) {
1755: efree(list);
1756: }
1757: RETURN_FALSE;
1758: }
1759: break;
1760: default:
1761: convert_to_string_ex(arg1);
1762: if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1763: if (list) {
1764: efree(list);
1765: }
1766: RETURN_FALSE;
1767: }
1768: break;
1769: }
1770:
1771: if (list == NULL) {
1772: RETURN_FALSE;
1773: }
1774:
1775: if (MBSTRG(current_detect_order_list)) {
1776: efree(MBSTRG(current_detect_order_list));
1777: }
1778: MBSTRG(current_detect_order_list) = list;
1779: MBSTRG(current_detect_order_list_size) = size;
1780: RETURN_TRUE;
1781: }
1782: }
1783: /* }}} */
1784:
1785: /* {{{ proto mixed mb_substitute_character([mixed substchar])
1786: Sets the current substitute_character or returns the current substitute_character */
1787: PHP_FUNCTION(mb_substitute_character)
1788: {
1789: zval **arg1 = NULL;
1790:
1791: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1792: return;
1793: }
1794:
1795: if (!arg1) {
1796: if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1797: RETURN_STRING("none", 1);
1798: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1799: RETURN_STRING("long", 1);
1800: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1801: RETURN_STRING("entity", 1);
1802: } else {
1803: RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1804: }
1805: } else {
1806: RETVAL_TRUE;
1807:
1808: switch (Z_TYPE_PP(arg1)) {
1809: case IS_STRING:
1810: if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1811: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1812: } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1813: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1814: } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1815: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1816: } else {
1817: convert_to_long_ex(arg1);
1818:
1819: if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1820: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1821: MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1822: } else {
1823: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1824: RETURN_FALSE;
1825: }
1826: }
1827: break;
1828: default:
1829: convert_to_long_ex(arg1);
1830: if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1831: MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1832: MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1833: } else {
1834: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1835: RETURN_FALSE;
1836: }
1837: break;
1838: }
1839: }
1840: }
1841: /* }}} */
1842:
1843: /* {{{ proto string mb_preferred_mime_name(string encoding)
1844: Return the preferred MIME name (charset) as a string */
1845: PHP_FUNCTION(mb_preferred_mime_name)
1846: {
1847: enum mbfl_no_encoding no_encoding;
1848: char *name = NULL;
1849: int name_len;
1850:
1851: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
1852: return;
1853: } else {
1854: no_encoding = mbfl_name2no_encoding(name);
1855: if (no_encoding == mbfl_no_encoding_invalid) {
1856: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1857: RETVAL_FALSE;
1858: } else {
1859: const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1860: if (preferred_name == NULL || *preferred_name == '\0') {
1861: php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1862: RETVAL_FALSE;
1863: } else {
1864: RETVAL_STRING((char *)preferred_name, 1);
1865: }
1866: }
1867: }
1868: }
1869: /* }}} */
1870:
1871: #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1872: #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1873:
1874: /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
1875: Parses GET/POST/COOKIE data and sets global variables */
1876: PHP_FUNCTION(mb_parse_str)
1877: {
1878: zval *track_vars_array = NULL;
1879: char *encstr = NULL;
1880: int encstr_len;
1881: php_mb_encoding_handler_info_t info;
1882: enum mbfl_no_encoding detected;
1883:
1884: track_vars_array = NULL;
1885: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
1886: return;
1887: }
1888:
1889: /* Clear out the array */
1890: if (track_vars_array != NULL) {
1891: zval_dtor(track_vars_array);
1892: array_init(track_vars_array);
1893: }
1894:
1895: encstr = estrndup(encstr, encstr_len);
1896:
1897: info.data_type = PARSE_STRING;
1898: info.separator = PG(arg_separator).input;
1899: info.force_register_globals = (track_vars_array == NULL);
1900: info.report_errors = 1;
1901: info.to_encoding = MBSTRG(current_internal_encoding);
1902: info.to_language = MBSTRG(language);
1903: info.from_encodings = MBSTRG(http_input_list);
1904: info.num_from_encodings = MBSTRG(http_input_list_size);
1905: info.from_language = MBSTRG(language);
1906:
1907: detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
1908:
1909: MBSTRG(http_input_identify) = detected;
1910:
1911: RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
1912:
1913: if (encstr != NULL) efree(encstr);
1914: }
1915: /* }}} */
1916:
1917: /* {{{ proto string mb_output_handler(string contents, int status)
1918: Returns string in output buffer converted to the http_output encoding */
1919: PHP_FUNCTION(mb_output_handler)
1920: {
1921: char *arg_string;
1922: int arg_string_len;
1923: long arg_status;
1924: mbfl_string string, result;
1925: const char *charset;
1926: char *p;
1927: enum mbfl_no_encoding encoding;
1928: int last_feed, len;
1929: unsigned char send_text_mimetype = 0;
1930: char *s, *mimetype = NULL;
1931:
1932: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
1933: return;
1934: }
1935:
1936: encoding = MBSTRG(current_http_output_encoding);
1937:
1938: /* start phase only */
1939: if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1940: /* delete the converter just in case. */
1941: if (MBSTRG(outconv)) {
1942: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1943: mbfl_buffer_converter_delete(MBSTRG(outconv));
1944: MBSTRG(outconv) = NULL;
1945: }
1946: if (encoding == mbfl_no_encoding_pass) {
1947: RETURN_STRINGL(arg_string, arg_string_len, 1);
1948: }
1949:
1950: /* analyze mime type */
1951: if (SG(sapi_headers).mimetype &&
1952: _php_mb_match_regex(
1953: MBSTRG(http_output_conv_mimetypes),
1954: SG(sapi_headers).mimetype,
1955: strlen(SG(sapi_headers).mimetype))) {
1956: if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
1957: mimetype = estrdup(SG(sapi_headers).mimetype);
1958: } else {
1959: mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1960: }
1961: send_text_mimetype = 1;
1962: } else if (SG(sapi_headers).send_default_content_type) {
1963: mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1964: }
1965:
1966: /* if content-type is not yet set, set it and activate the converter */
1967: if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1968: charset = mbfl_no2preferred_mime_name(encoding);
1969: if (charset) {
1970: len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
1971: if (sapi_add_header(p, len, 0) != FAILURE) {
1972: SG(sapi_headers).send_default_content_type = 0;
1973: }
1974: }
1975: /* activate the converter */
1976: MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1977: if (send_text_mimetype){
1978: efree(mimetype);
1979: }
1980: }
1981: }
1982:
1983: /* just return if the converter is not activated. */
1984: if (MBSTRG(outconv) == NULL) {
1985: RETURN_STRINGL(arg_string, arg_string_len, 1);
1986: }
1987:
1988: /* flag */
1989: last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1990: /* mode */
1991: mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1992: mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1993:
1994: /* feed the string */
1995: mbfl_string_init(&string);
1996: string.no_language = MBSTRG(language);
1997: string.no_encoding = MBSTRG(current_internal_encoding);
1998: string.val = (unsigned char *)arg_string;
1999: string.len = arg_string_len;
2000: mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2001: if (last_feed) {
2002: mbfl_buffer_converter_flush(MBSTRG(outconv));
2003: }
2004: /* get the converter output, and return it */
2005: mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2006: RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
2007:
2008: /* delete the converter if it is the last feed. */
2009: if (last_feed) {
2010: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2011: mbfl_buffer_converter_delete(MBSTRG(outconv));
2012: MBSTRG(outconv) = NULL;
2013: }
2014: }
2015: /* }}} */
2016:
2017: /* {{{ proto int mb_strlen(string str [, string encoding])
2018: Get character numbers of a string */
2019: PHP_FUNCTION(mb_strlen)
2020: {
2021: int n;
2022: mbfl_string string;
2023: char *enc_name = NULL;
2024: int enc_name_len;
2025:
2026: mbfl_string_init(&string);
2027:
2028: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2029: RETURN_FALSE;
2030: }
2031:
2032: string.no_language = MBSTRG(language);
2033: if (enc_name == NULL) {
2034: string.no_encoding = MBSTRG(current_internal_encoding);
2035: } else {
2036: string.no_encoding = mbfl_name2no_encoding(enc_name);
2037: if (string.no_encoding == mbfl_no_encoding_invalid) {
2038: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2039: RETURN_FALSE;
2040: }
2041: }
2042:
2043: n = mbfl_strlen(&string);
2044: if (n >= 0) {
2045: RETVAL_LONG(n);
2046: } else {
2047: RETVAL_FALSE;
2048: }
2049: }
2050: /* }}} */
2051:
2052: /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2053: Find position of first occurrence of a string within another */
2054: PHP_FUNCTION(mb_strpos)
2055: {
2056: int n, reverse = 0;
2057: long offset;
2058: mbfl_string haystack, needle;
2059: char *enc_name = NULL;
2060: int enc_name_len;
2061:
2062: mbfl_string_init(&haystack);
2063: mbfl_string_init(&needle);
2064: haystack.no_language = MBSTRG(language);
2065: haystack.no_encoding = MBSTRG(current_internal_encoding);
2066: needle.no_language = MBSTRG(language);
2067: needle.no_encoding = MBSTRG(current_internal_encoding);
2068: offset = 0;
2069:
2070: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2071: RETURN_FALSE;
2072: }
2073:
2074: if (enc_name != NULL) {
2075: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2076: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2077: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2078: RETURN_FALSE;
2079: }
2080: }
2081:
2082: if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2083: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2084: RETURN_FALSE;
2085: }
2086: if (needle.len == 0) {
2087: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2088: RETURN_FALSE;
2089: }
2090:
2091: n = mbfl_strpos(&haystack, &needle, offset, reverse);
2092: if (n >= 0) {
2093: RETVAL_LONG(n);
2094: } else {
2095: switch (-n) {
2096: case 1:
2097: break;
2098: case 2:
2099: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2100: break;
2101: case 4:
2102: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2103: break;
2104: case 8:
2105: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2106: break;
2107: default:
2108: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2109: break;
2110: }
2111: RETVAL_FALSE;
2112: }
2113: }
2114: /* }}} */
2115:
2116: /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2117: Find position of last occurrence of a string within another */
2118: PHP_FUNCTION(mb_strrpos)
2119: {
2120: int n;
2121: mbfl_string haystack, needle;
2122: char *enc_name = NULL;
2123: int enc_name_len;
2124: zval **zoffset = NULL;
2125: long offset = 0, str_flg;
2126: char *enc_name2 = NULL;
2127: int enc_name_len2;
2128:
2129: mbfl_string_init(&haystack);
2130: mbfl_string_init(&needle);
2131: haystack.no_language = MBSTRG(language);
2132: haystack.no_encoding = MBSTRG(current_internal_encoding);
2133: needle.no_language = MBSTRG(language);
2134: needle.no_encoding = MBSTRG(current_internal_encoding);
2135:
2136: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2137: RETURN_FALSE;
2138: }
2139:
2140: if (zoffset) {
2141: if (Z_TYPE_PP(zoffset) == IS_STRING) {
2142: enc_name2 = Z_STRVAL_PP(zoffset);
2143: enc_name_len2 = Z_STRLEN_PP(zoffset);
2144: str_flg = 1;
2145:
2146: if (enc_name2 != NULL) {
2147: switch (*enc_name2) {
2148: case '0':
2149: case '1':
2150: case '2':
2151: case '3':
2152: case '4':
2153: case '5':
2154: case '6':
2155: case '7':
2156: case '8':
2157: case '9':
2158: case ' ':
2159: case '-':
2160: case '.':
2161: break;
2162: default :
2163: str_flg = 0;
2164: break;
2165: }
2166: }
2167:
2168: if (str_flg) {
2169: convert_to_long_ex(zoffset);
2170: offset = Z_LVAL_PP(zoffset);
2171: } else {
2172: enc_name = enc_name2;
2173: enc_name_len = enc_name_len2;
2174: }
2175: } else {
2176: convert_to_long_ex(zoffset);
2177: offset = Z_LVAL_PP(zoffset);
2178: }
2179: }
2180:
2181: if (enc_name != NULL) {
2182: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2183: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2184: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2185: RETURN_FALSE;
2186: }
2187: }
2188:
2189: if (haystack.len <= 0) {
2190: RETURN_FALSE;
2191: }
2192: if (needle.len <= 0) {
2193: RETURN_FALSE;
2194: }
2195:
2196: {
2197: int haystack_char_len = mbfl_strlen(&haystack);
2198: if ((offset > 0 && offset > haystack_char_len) ||
2199: (offset < 0 && -offset > haystack_char_len)) {
2200: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2201: RETURN_FALSE;
2202: }
2203: }
2204:
2205: n = mbfl_strpos(&haystack, &needle, offset, 1);
2206: if (n >= 0) {
2207: RETVAL_LONG(n);
2208: } else {
2209: RETVAL_FALSE;
2210: }
2211: }
2212: /* }}} */
2213:
2214: /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2215: Finds position of first occurrence of a string within another, case insensitive */
2216: PHP_FUNCTION(mb_stripos)
2217: {
2218: int n;
2219: long offset;
2220: mbfl_string haystack, needle;
2221: char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2222: int from_encoding_len;
2223: n = -1;
2224: offset = 0;
2225:
2226: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2227: RETURN_FALSE;
2228: }
2229: if (needle.len == 0) {
2230: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2231: RETURN_FALSE;
2232: }
2233: n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2234:
2235: if (n >= 0) {
2236: RETVAL_LONG(n);
2237: } else {
2238: RETVAL_FALSE;
2239: }
2240: }
2241: /* }}} */
2242:
2243: /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2244: Finds position of last occurrence of a string within another, case insensitive */
2245: PHP_FUNCTION(mb_strripos)
2246: {
2247: int n;
2248: long offset;
2249: mbfl_string haystack, needle;
2250: const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2251: int from_encoding_len;
2252: n = -1;
2253: offset = 0;
2254:
2255: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2256: RETURN_FALSE;
2257: }
2258:
2259: n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2260:
2261: if (n >= 0) {
2262: RETVAL_LONG(n);
2263: } else {
2264: RETVAL_FALSE;
2265: }
2266: }
2267: /* }}} */
2268:
2269: /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2270: Finds first occurrence of a string within another */
2271: PHP_FUNCTION(mb_strstr)
2272: {
2273: int n, len, mblen;
2274: mbfl_string haystack, needle, result, *ret = NULL;
2275: char *enc_name = NULL;
2276: int enc_name_len;
2277: zend_bool part = 0;
2278:
2279: mbfl_string_init(&haystack);
2280: mbfl_string_init(&needle);
2281: haystack.no_language = MBSTRG(language);
2282: haystack.no_encoding = MBSTRG(current_internal_encoding);
2283: needle.no_language = MBSTRG(language);
2284: needle.no_encoding = MBSTRG(current_internal_encoding);
2285:
2286: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2287: RETURN_FALSE;
2288: }
2289:
2290: if (enc_name != NULL) {
2291: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2292: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2293: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2294: RETURN_FALSE;
2295: }
2296: }
2297:
2298: if (needle.len <= 0) {
2299: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2300: RETURN_FALSE;
2301: }
2302: n = mbfl_strpos(&haystack, &needle, 0, 0);
2303: if (n >= 0) {
2304: mblen = mbfl_strlen(&haystack);
2305: if (part) {
2306: ret = mbfl_substr(&haystack, &result, 0, n);
2307: if (ret != NULL) {
2308: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2309: } else {
2310: RETVAL_FALSE;
2311: }
2312: } else {
2313: len = (mblen - n);
2314: ret = mbfl_substr(&haystack, &result, n, len);
2315: if (ret != NULL) {
2316: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2317: } else {
2318: RETVAL_FALSE;
2319: }
2320: }
2321: } else {
2322: RETVAL_FALSE;
2323: }
2324: }
2325: /* }}} */
2326:
2327: /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2328: Finds the last occurrence of a character in a string within another */
2329: PHP_FUNCTION(mb_strrchr)
2330: {
2331: int n, len, mblen;
2332: mbfl_string haystack, needle, result, *ret = NULL;
2333: char *enc_name = NULL;
2334: int enc_name_len;
2335: zend_bool part = 0;
2336:
2337: mbfl_string_init(&haystack);
2338: mbfl_string_init(&needle);
2339: haystack.no_language = MBSTRG(language);
2340: haystack.no_encoding = MBSTRG(current_internal_encoding);
2341: needle.no_language = MBSTRG(language);
2342: needle.no_encoding = MBSTRG(current_internal_encoding);
2343:
2344: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2345: RETURN_FALSE;
2346: }
2347:
2348: if (enc_name != NULL) {
2349: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2350: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2351: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2352: RETURN_FALSE;
2353: }
2354: }
2355:
2356: if (haystack.len <= 0) {
2357: RETURN_FALSE;
2358: }
2359: if (needle.len <= 0) {
2360: RETURN_FALSE;
2361: }
2362: n = mbfl_strpos(&haystack, &needle, 0, 1);
2363: if (n >= 0) {
2364: mblen = mbfl_strlen(&haystack);
2365: if (part) {
2366: ret = mbfl_substr(&haystack, &result, 0, n);
2367: if (ret != NULL) {
2368: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2369: } else {
2370: RETVAL_FALSE;
2371: }
2372: } else {
2373: len = (mblen - n);
2374: ret = mbfl_substr(&haystack, &result, n, len);
2375: if (ret != NULL) {
2376: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2377: } else {
2378: RETVAL_FALSE;
2379: }
2380: }
2381: } else {
2382: RETVAL_FALSE;
2383: }
2384: }
2385: /* }}} */
2386:
2387: /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2388: Finds first occurrence of a string within another, case insensitive */
2389: PHP_FUNCTION(mb_stristr)
2390: {
2391: zend_bool part = 0;
2392: unsigned int from_encoding_len, len, mblen;
2393: int n;
2394: mbfl_string haystack, needle, result, *ret = NULL;
2395: const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2396: mbfl_string_init(&haystack);
2397: mbfl_string_init(&needle);
2398: haystack.no_language = MBSTRG(language);
2399: haystack.no_encoding = MBSTRG(current_internal_encoding);
2400: needle.no_language = MBSTRG(language);
2401: needle.no_encoding = MBSTRG(current_internal_encoding);
2402:
2403:
2404: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2405: RETURN_FALSE;
2406: }
2407:
2408: if (!needle.len) {
2409: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2410: RETURN_FALSE;
2411: }
2412:
2413: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2414: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2415: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2416: RETURN_FALSE;
2417: }
2418:
2419: n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2420:
2421: if (n <0) {
2422: RETURN_FALSE;
2423: }
2424:
2425: mblen = mbfl_strlen(&haystack);
2426:
2427: if (part) {
2428: ret = mbfl_substr(&haystack, &result, 0, n);
2429: if (ret != NULL) {
2430: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2431: } else {
2432: RETVAL_FALSE;
2433: }
2434: } else {
2435: len = (mblen - n);
2436: ret = mbfl_substr(&haystack, &result, n, len);
2437: if (ret != NULL) {
2438: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2439: } else {
2440: RETVAL_FALSE;
2441: }
2442: }
2443: }
2444: /* }}} */
2445:
2446: /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2447: Finds the last occurrence of a character in a string within another, case insensitive */
2448: PHP_FUNCTION(mb_strrichr)
2449: {
2450: zend_bool part = 0;
2451: int n, from_encoding_len, len, mblen;
2452: mbfl_string haystack, needle, result, *ret = NULL;
2453: char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2454: mbfl_string_init(&haystack);
2455: mbfl_string_init(&needle);
2456: haystack.no_language = MBSTRG(language);
2457: haystack.no_encoding = MBSTRG(current_internal_encoding);
2458: needle.no_language = MBSTRG(language);
2459: needle.no_encoding = MBSTRG(current_internal_encoding);
2460:
2461:
2462: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2463: RETURN_FALSE;
2464: }
2465:
2466: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2467: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2468: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2469: RETURN_FALSE;
2470: }
2471:
2472: n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2473:
2474: if (n <0) {
2475: RETURN_FALSE;
2476: }
2477:
2478: mblen = mbfl_strlen(&haystack);
2479:
2480: if (part) {
2481: ret = mbfl_substr(&haystack, &result, 0, n);
2482: if (ret != NULL) {
2483: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2484: } else {
2485: RETVAL_FALSE;
2486: }
2487: } else {
2488: len = (mblen - n);
2489: ret = mbfl_substr(&haystack, &result, n, len);
2490: if (ret != NULL) {
2491: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2492: } else {
2493: RETVAL_FALSE;
2494: }
2495: }
2496: }
2497: /* }}} */
2498:
2499: /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2500: Count the number of substring occurrences */
2501: PHP_FUNCTION(mb_substr_count)
2502: {
2503: int n;
2504: mbfl_string haystack, needle;
2505: char *enc_name = NULL;
2506: int enc_name_len;
2507:
2508: mbfl_string_init(&haystack);
2509: mbfl_string_init(&needle);
2510: haystack.no_language = MBSTRG(language);
2511: haystack.no_encoding = MBSTRG(current_internal_encoding);
2512: needle.no_language = MBSTRG(language);
2513: needle.no_encoding = MBSTRG(current_internal_encoding);
2514:
2515: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2516: return;
2517: }
2518:
2519: if (enc_name != NULL) {
2520: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2521: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2522: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2523: RETURN_FALSE;
2524: }
2525: }
2526:
2527: if (needle.len <= 0) {
2528: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2529: RETURN_FALSE;
2530: }
2531:
2532: n = mbfl_substr_count(&haystack, &needle);
2533: if (n >= 0) {
2534: RETVAL_LONG(n);
2535: } else {
2536: RETVAL_FALSE;
2537: }
2538: }
2539: /* }}} */
2540:
2541: /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2542: Returns part of a string */
2543: PHP_FUNCTION(mb_substr)
2544: {
2545: size_t argc = ZEND_NUM_ARGS();
2546: char *str, *encoding;
2547: long from, len;
2548: int mblen, str_len, encoding_len;
2549: mbfl_string string, result, *ret;
2550:
2551: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2552: return;
2553: }
2554:
2555: mbfl_string_init(&string);
2556: string.no_language = MBSTRG(language);
2557: string.no_encoding = MBSTRG(current_internal_encoding);
2558:
2559: if (argc == 4) {
2560: string.no_encoding = mbfl_name2no_encoding(encoding);
2561: if (string.no_encoding == mbfl_no_encoding_invalid) {
2562: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2563: RETURN_FALSE;
2564: }
2565: }
2566:
2567: string.val = (unsigned char *)str;
2568: string.len = str_len;
2569:
2570: if (argc < 3) {
2571: len = str_len;
2572: }
2573:
2574: /* measures length */
2575: mblen = 0;
2576: if (from < 0 || len < 0) {
2577: mblen = mbfl_strlen(&string);
2578: }
2579:
2580: /* if "from" position is negative, count start position from the end
2581: * of the string
2582: */
2583: if (from < 0) {
2584: from = mblen + from;
2585: if (from < 0) {
2586: from = 0;
2587: }
2588: }
2589:
2590: /* if "length" position is negative, set it to the length
2591: * needed to stop that many chars from the end of the string
2592: */
2593: if (len < 0) {
2594: len = (mblen - from) + len;
2595: if (len < 0) {
2596: len = 0;
2597: }
2598: }
2599:
2600: if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2601: && (from >= mbfl_strlen(&string))) {
2602: RETURN_FALSE;
2603: }
2604:
2605: ret = mbfl_substr(&string, &result, from, len);
2606: if (NULL == ret) {
2607: RETURN_FALSE;
2608: }
2609:
2610: RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2611: }
2612: /* }}} */
2613:
2614: /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2615: Returns part of a string */
2616: PHP_FUNCTION(mb_strcut)
2617: {
2618: size_t argc = ZEND_NUM_ARGS();
2619: char *encoding;
2620: long from, len;
2621: int encoding_len;
2622: mbfl_string string, result, *ret;
2623:
2624: mbfl_string_init(&string);
2625: string.no_language = MBSTRG(language);
2626: string.no_encoding = MBSTRG(current_internal_encoding);
2627:
2628: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2629: return;
2630: }
2631:
2632: if (argc == 4) {
2633: string.no_encoding = mbfl_name2no_encoding(encoding);
2634: if (string.no_encoding == mbfl_no_encoding_invalid) {
2635: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2636: RETURN_FALSE;
2637: }
2638: }
2639:
2640: if (argc < 3) {
2641: len = string.len;
2642: }
2643:
2644: /* if "from" position is negative, count start position from the end
2645: * of the string
2646: */
2647: if (from < 0) {
2648: from = string.len + from;
2649: if (from < 0) {
2650: from = 0;
2651: }
2652: }
2653:
2654: /* if "length" position is negative, set it to the length
2655: * needed to stop that many chars from the end of the string
2656: */
2657: if (len < 0) {
2658: len = (string.len - from) + len;
2659: if (len < 0) {
2660: len = 0;
2661: }
2662: }
2663:
2664: if ((unsigned int)from > string.len) {
2665: RETURN_FALSE;
2666: }
2667:
2668: ret = mbfl_strcut(&string, &result, from, len);
2669: if (ret == NULL) {
2670: RETURN_FALSE;
2671: }
2672:
2673: RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2674: }
2675: /* }}} */
2676:
2677: /* {{{ proto int mb_strwidth(string str [, string encoding])
2678: Gets terminal width of a string */
2679: PHP_FUNCTION(mb_strwidth)
2680: {
2681: int n;
2682: mbfl_string string;
2683: char *enc_name = NULL;
2684: int enc_name_len;
2685:
2686: mbfl_string_init(&string);
2687:
2688: string.no_language = MBSTRG(language);
2689: string.no_encoding = MBSTRG(current_internal_encoding);
2690:
2691: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2692: return;
2693: }
2694:
2695: if (enc_name != NULL) {
2696: string.no_encoding = mbfl_name2no_encoding(enc_name);
2697: if (string.no_encoding == mbfl_no_encoding_invalid) {
2698: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2699: RETURN_FALSE;
2700: }
2701: }
2702:
2703: n = mbfl_strwidth(&string);
2704: if (n >= 0) {
2705: RETVAL_LONG(n);
2706: } else {
2707: RETVAL_FALSE;
2708: }
2709: }
2710: /* }}} */
2711:
2712: /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2713: Trim the string in terminal width */
2714: PHP_FUNCTION(mb_strimwidth)
2715: {
2716: char *str, *trimmarker, *encoding;
2717: long from, width;
2718: int str_len, trimmarker_len, encoding_len;
2719: mbfl_string string, result, marker, *ret;
2720:
2721: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2722: return;
2723: }
2724:
2725: mbfl_string_init(&string);
2726: mbfl_string_init(&marker);
2727: string.no_language = MBSTRG(language);
2728: string.no_encoding = MBSTRG(current_internal_encoding);
2729: marker.no_language = MBSTRG(language);
2730: marker.no_encoding = MBSTRG(current_internal_encoding);
2731: marker.val = NULL;
2732: marker.len = 0;
2733:
2734: if (ZEND_NUM_ARGS() == 5) {
2735: string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2736: if (string.no_encoding == mbfl_no_encoding_invalid) {
2737: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2738: RETURN_FALSE;
2739: }
2740: }
2741:
2742: string.val = (unsigned char *)str;
2743: string.len = str_len;
2744:
2745: if (from < 0 || from > str_len) {
2746: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2747: RETURN_FALSE;
2748: }
2749:
2750: if (width < 0) {
2751: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2752: RETURN_FALSE;
2753: }
2754:
2755: if (ZEND_NUM_ARGS() >= 4) {
2756: marker.val = (unsigned char *)trimmarker;
2757: marker.len = trimmarker_len;
2758: }
2759:
2760: ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2761:
2762: if (ret == NULL) {
2763: RETURN_FALSE;
2764: }
2765:
2766: RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2767: }
2768: /* }}} */
2769:
2770: /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2771: MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2772: {
2773: mbfl_string string, result, *ret;
2774: enum mbfl_no_encoding from_encoding, to_encoding;
2775: mbfl_buffer_converter *convd;
2776: int size, *list;
2777: char *output=NULL;
2778:
2779: if (output_len) {
2780: *output_len = 0;
2781: }
2782: if (!input) {
2783: return NULL;
2784: }
2785: /* new encoding */
2786: if (_to_encoding && strlen(_to_encoding)) {
2787: to_encoding = mbfl_name2no_encoding(_to_encoding);
2788: if (to_encoding == mbfl_no_encoding_invalid) {
2789: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2790: return NULL;
2791: }
2792: } else {
2793: to_encoding = MBSTRG(current_internal_encoding);
2794: }
2795:
2796: /* initialize string */
2797: mbfl_string_init(&string);
2798: mbfl_string_init(&result);
2799: from_encoding = MBSTRG(current_internal_encoding);
2800: string.no_encoding = from_encoding;
2801: string.no_language = MBSTRG(language);
2802: string.val = (unsigned char *)input;
2803: string.len = length;
2804:
2805: /* pre-conversion encoding */
2806: if (_from_encodings) {
2807: list = NULL;
2808: size = 0;
2809: php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2810: if (size == 1) {
2811: from_encoding = *list;
2812: string.no_encoding = from_encoding;
2813: } else if (size > 1) {
2814: /* auto detect */
2815: from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
2816: if (from_encoding != mbfl_no_encoding_invalid) {
2817: string.no_encoding = from_encoding;
2818: } else {
2819: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2820: from_encoding = mbfl_no_encoding_pass;
2821: to_encoding = from_encoding;
2822: string.no_encoding = from_encoding;
2823: }
2824: } else {
2825: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
2826: }
2827: if (list != NULL) {
2828: efree((void *)list);
2829: }
2830: }
2831:
2832: /* initialize converter */
2833: convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2834: if (convd == NULL) {
2835: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
2836: return NULL;
2837: }
2838: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2839: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2840:
2841: /* do it */
2842: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2843: if (ret) {
2844: if (output_len) {
2845: *output_len = ret->len;
2846: }
2847: output = (char *)ret->val;
2848: }
2849:
2850: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2851: mbfl_buffer_converter_delete(convd);
2852: return output;
2853: }
2854: /* }}} */
2855:
2856: /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
2857: Returns converted string in desired encoding */
2858: PHP_FUNCTION(mb_convert_encoding)
2859: {
2860: char *arg_str, *arg_new;
2861: int str_len, new_len;
2862: zval *arg_old;
2863: int i;
2864: size_t size, l, n;
2865: char *_from_encodings = NULL, *ret, *s_free = NULL;
2866:
2867: zval **hash_entry;
2868: HashTable *target_hash;
2869:
2870: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
2871: return;
2872: }
2873:
2874: if (ZEND_NUM_ARGS() == 3) {
2875: switch (Z_TYPE_P(arg_old)) {
2876: case IS_ARRAY:
2877: target_hash = Z_ARRVAL_P(arg_old);
2878: zend_hash_internal_pointer_reset(target_hash);
2879: i = zend_hash_num_elements(target_hash);
2880: _from_encodings = NULL;
2881:
2882: while (i > 0) {
2883: if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
2884: break;
2885: }
2886:
2887: convert_to_string_ex(hash_entry);
2888:
2889: if ( _from_encodings) {
2890: l = strlen(_from_encodings);
2891: n = strlen(Z_STRVAL_PP(hash_entry));
2892: _from_encodings = erealloc(_from_encodings, l+n+2);
2893: strcpy(_from_encodings+l, ",");
2894: strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
2895: } else {
2896: _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
2897: }
2898:
2899: zend_hash_move_forward(target_hash);
2900: i--;
2901: }
2902:
2903: if (_from_encodings != NULL && !strlen(_from_encodings)) {
2904: efree(_from_encodings);
2905: _from_encodings = NULL;
2906: }
2907: s_free = _from_encodings;
2908: break;
2909: default:
2910: convert_to_string(arg_old);
2911: _from_encodings = Z_STRVAL_P(arg_old);
2912: break;
2913: }
2914: }
2915:
2916: /* new encoding */
2917: ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
2918: if (ret != NULL) {
2919: RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
2920: } else {
2921: RETVAL_FALSE;
2922: }
2923:
2924: if ( s_free) {
2925: efree(s_free);
2926: }
2927: }
2928: /* }}} */
2929:
2930: /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
2931: Returns a case-folded version of sourcestring */
2932: PHP_FUNCTION(mb_convert_case)
2933: {
2934: char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2935: int str_len, from_encoding_len;
2936: long case_mode = 0;
2937: char *newstr;
2938: size_t ret_len;
2939:
2940: RETVAL_FALSE;
2941: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
2942: &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
2943: RETURN_FALSE;
2944:
2945: newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2946:
2947: if (newstr) {
2948: RETVAL_STRINGL(newstr, ret_len, 0);
2949: }
2950: }
2951: /* }}} */
2952:
2953: /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
2954: * Returns a uppercased version of sourcestring
2955: */
2956: PHP_FUNCTION(mb_strtoupper)
2957: {
2958: char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2959: int str_len, from_encoding_len;
2960: char *newstr;
2961: size_t ret_len;
2962:
2963: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2964: &from_encoding, &from_encoding_len) == FAILURE) {
2965: return;
2966: }
2967: newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2968:
2969: if (newstr) {
2970: RETURN_STRINGL(newstr, ret_len, 0);
2971: }
2972: RETURN_FALSE;
2973: }
2974: /* }}} */
2975:
2976: /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
2977: * Returns a lowercased version of sourcestring
2978: */
2979: PHP_FUNCTION(mb_strtolower)
2980: {
2981: char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2982: int str_len, from_encoding_len;
2983: char *newstr;
2984: size_t ret_len;
2985:
2986: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2987: &from_encoding, &from_encoding_len) == FAILURE) {
2988: return;
2989: }
2990: newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2991:
2992: if (newstr) {
2993: RETURN_STRINGL(newstr, ret_len, 0);
2994: }
2995: RETURN_FALSE;
2996: }
2997: /* }}} */
2998:
2999: /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3000: Encodings of the given string is returned (as a string) */
3001: PHP_FUNCTION(mb_detect_encoding)
3002: {
3003: char *str;
3004: int str_len;
3005: zend_bool strict=0;
3006: zval *encoding_list;
3007:
3008: mbfl_string string;
3009: const char *ret;
3010: enum mbfl_no_encoding *elist;
3011: int size, *list;
3012:
3013: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3014: return;
3015: }
3016:
3017: /* make encoding list */
3018: list = NULL;
3019: size = 0;
3020: if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3021: switch (Z_TYPE_P(encoding_list)) {
3022: case IS_ARRAY:
3023: if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3024: if (list) {
3025: efree(list);
3026: list = NULL;
3027: size = 0;
3028: }
3029: }
3030: break;
3031: default:
3032: convert_to_string(encoding_list);
3033: if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3034: if (list) {
3035: efree(list);
3036: list = NULL;
3037: size = 0;
3038: }
3039: }
3040: break;
3041: }
3042: if (size <= 0) {
3043: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3044: }
3045: }
3046:
3047: if (ZEND_NUM_ARGS() < 3) {
3048: strict = (zend_bool)MBSTRG(strict_detection);
3049: }
3050:
3051: if (size > 0 && list != NULL) {
3052: elist = list;
3053: } else {
3054: elist = MBSTRG(current_detect_order_list);
3055: size = MBSTRG(current_detect_order_list_size);
3056: }
3057:
3058: mbfl_string_init(&string);
3059: string.no_language = MBSTRG(language);
3060: string.val = (unsigned char *)str;
3061: string.len = str_len;
3062: ret = mbfl_identify_encoding_name(&string, elist, size, strict);
3063:
3064: if (list != NULL) {
3065: efree((void *)list);
3066: }
3067:
3068: if (ret == NULL) {
3069: RETURN_FALSE;
3070: }
3071:
3072: RETVAL_STRING((char *)ret, 1);
3073: }
3074: /* }}} */
3075:
3076: /* {{{ proto mixed mb_list_encodings()
3077: Returns an array of all supported entity encodings */
3078: PHP_FUNCTION(mb_list_encodings)
3079: {
3080: const mbfl_encoding **encodings;
3081: const mbfl_encoding *encoding;
3082: int i;
3083:
3084: array_init(return_value);
3085: i = 0;
3086: encodings = mbfl_get_supported_encodings();
3087: while ((encoding = encodings[i++]) != NULL) {
3088: add_next_index_string(return_value, (char *) encoding->name, 1);
3089: }
3090: }
3091: /* }}} */
3092:
3093: /* {{{ proto array mb_encoding_aliases(string encoding)
3094: Returns an array of the aliases of a given encoding name */
3095: PHP_FUNCTION(mb_encoding_aliases)
3096: {
3097: const mbfl_encoding *encoding;
3098: char *name = NULL;
3099: int name_len;
3100:
3101: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3102: RETURN_FALSE;
3103: }
3104:
3105: encoding = mbfl_name2encoding(name);
3106: if (!encoding) {
3107: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3108: RETURN_FALSE;
3109: }
3110:
3111: array_init(return_value);
3112: if (encoding->aliases != NULL) {
3113: const char **alias;
3114: for (alias = *encoding->aliases; *alias; ++alias) {
3115: add_next_index_string(return_value, (char *)*alias, 1);
3116: }
3117: }
3118: }
3119: /* }}} */
3120:
3121: /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3122: Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3123: PHP_FUNCTION(mb_encode_mimeheader)
3124: {
3125: enum mbfl_no_encoding charset, transenc;
3126: mbfl_string string, result, *ret;
3127: char *charset_name = NULL;
3128: int charset_name_len;
3129: char *trans_enc_name = NULL;
3130: int trans_enc_name_len;
3131: char *linefeed = "\r\n";
3132: int linefeed_len;
3133: long indent = 0;
3134:
3135: mbfl_string_init(&string);
3136: string.no_language = MBSTRG(language);
3137: string.no_encoding = MBSTRG(current_internal_encoding);
3138:
3139: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3140: return;
3141: }
3142:
3143: charset = mbfl_no_encoding_pass;
3144: transenc = mbfl_no_encoding_base64;
3145:
3146: if (charset_name != NULL) {
3147: charset = mbfl_name2no_encoding(charset_name);
3148: if (charset == mbfl_no_encoding_invalid) {
3149: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3150: RETURN_FALSE;
3151: }
3152: } else {
3153: const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3154: if (lang != NULL) {
3155: charset = lang->mail_charset;
3156: transenc = lang->mail_header_encoding;
3157: }
3158: }
3159:
3160: if (trans_enc_name != NULL) {
3161: if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3162: transenc = mbfl_no_encoding_base64;
3163: } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3164: transenc = mbfl_no_encoding_qprint;
3165: }
3166: }
3167:
3168: mbfl_string_init(&result);
3169: ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3170: if (ret != NULL) {
3171: RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
3172: } else {
3173: RETVAL_FALSE;
3174: }
3175: }
3176: /* }}} */
3177:
3178: /* {{{ proto string mb_decode_mimeheader(string string)
3179: Decodes the MIME "encoded-word" in the string */
3180: PHP_FUNCTION(mb_decode_mimeheader)
3181: {
3182: mbfl_string string, result, *ret;
3183:
3184: mbfl_string_init(&string);
3185: string.no_language = MBSTRG(language);
3186: string.no_encoding = MBSTRG(current_internal_encoding);
3187:
3188: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3189: return;
3190: }
3191:
3192: mbfl_string_init(&result);
3193: ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3194: if (ret != NULL) {
3195: RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
3196: } else {
3197: RETVAL_FALSE;
3198: }
3199: }
3200: /* }}} */
3201:
3202: /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3203: Conversion between full-width character and half-width character (Japanese) */
3204: PHP_FUNCTION(mb_convert_kana)
3205: {
3206: int opt, i;
3207: mbfl_string string, result, *ret;
3208: char *optstr = NULL;
3209: int optstr_len;
3210: char *encname = NULL;
3211: int encname_len;
3212:
3213: mbfl_string_init(&string);
3214: string.no_language = MBSTRG(language);
3215: string.no_encoding = MBSTRG(current_internal_encoding);
3216:
3217: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3218: return;
3219: }
3220:
3221: /* option */
3222: if (optstr != NULL) {
3223: char *p = optstr;
3224: int n = optstr_len;
3225: i = 0;
3226: opt = 0;
3227: while (i < n) {
3228: i++;
3229: switch (*p++) {
3230: case 'A':
3231: opt |= 0x1;
3232: break;
3233: case 'a':
3234: opt |= 0x10;
3235: break;
3236: case 'R':
3237: opt |= 0x2;
3238: break;
3239: case 'r':
3240: opt |= 0x20;
3241: break;
3242: case 'N':
3243: opt |= 0x4;
3244: break;
3245: case 'n':
3246: opt |= 0x40;
3247: break;
3248: case 'S':
3249: opt |= 0x8;
3250: break;
3251: case 's':
3252: opt |= 0x80;
3253: break;
3254: case 'K':
3255: opt |= 0x100;
3256: break;
3257: case 'k':
3258: opt |= 0x1000;
3259: break;
3260: case 'H':
3261: opt |= 0x200;
3262: break;
3263: case 'h':
3264: opt |= 0x2000;
3265: break;
3266: case 'V':
3267: opt |= 0x800;
3268: break;
3269: case 'C':
3270: opt |= 0x10000;
3271: break;
3272: case 'c':
3273: opt |= 0x20000;
3274: break;
3275: case 'M':
3276: opt |= 0x100000;
3277: break;
3278: case 'm':
3279: opt |= 0x200000;
3280: break;
3281: }
3282: }
3283: } else {
3284: opt = 0x900;
3285: }
3286:
3287: /* encoding */
3288: if (encname != NULL) {
3289: string.no_encoding = mbfl_name2no_encoding(encname);
3290: if (string.no_encoding == mbfl_no_encoding_invalid) {
3291: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3292: RETURN_FALSE;
3293: }
3294: }
3295:
3296: ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3297: if (ret != NULL) {
3298: RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3299: } else {
3300: RETVAL_FALSE;
3301: }
3302: }
3303: /* }}} */
3304:
3305: #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3306:
3307: /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3308: Converts the string resource in variables to desired encoding */
3309: PHP_FUNCTION(mb_convert_variables)
3310: {
3311: zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3312: HashTable *target_hash;
3313: mbfl_string string, result, *ret;
3314: enum mbfl_no_encoding from_encoding, to_encoding;
3315: mbfl_encoding_detector *identd;
3316: mbfl_buffer_converter *convd;
3317: int n, to_enc_len, argc, stack_level, stack_max, elistsz;
3318: enum mbfl_no_encoding *elist;
3319: char *name, *to_enc;
3320: void *ptmp;
3321:
3322: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3323: return;
3324: }
3325:
3326: /* new encoding */
3327: to_encoding = mbfl_name2no_encoding(to_enc);
3328: if (to_encoding == mbfl_no_encoding_invalid) {
3329: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3330: efree(args);
3331: RETURN_FALSE;
3332: }
3333:
3334: /* initialize string */
3335: mbfl_string_init(&string);
3336: mbfl_string_init(&result);
3337: from_encoding = MBSTRG(current_internal_encoding);
3338: string.no_encoding = from_encoding;
3339: string.no_language = MBSTRG(language);
3340:
3341: /* pre-conversion encoding */
3342: elist = NULL;
3343: elistsz = 0;
3344: switch (Z_TYPE_PP(zfrom_enc)) {
3345: case IS_ARRAY:
3346: php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3347: break;
3348: default:
3349: convert_to_string_ex(zfrom_enc);
3350: php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3351: break;
3352: }
3353: if (elistsz <= 0) {
3354: from_encoding = mbfl_no_encoding_pass;
3355: } else if (elistsz == 1) {
3356: from_encoding = *elist;
3357: } else {
3358: /* auto detect */
3359: from_encoding = mbfl_no_encoding_invalid;
3360: stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3361: stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3362: stack_level = 0;
3363: identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3364: if (identd != NULL) {
3365: n = 0;
3366: while (n < argc || stack_level > 0) {
3367: if (stack_level <= 0) {
3368: var = args[n++];
3369: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3370: target_hash = HASH_OF(*var);
3371: if (target_hash != NULL) {
3372: zend_hash_internal_pointer_reset(target_hash);
3373: }
3374: }
3375: } else {
3376: stack_level--;
3377: var = stack[stack_level];
3378: }
3379: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3380: target_hash = HASH_OF(*var);
3381: if (target_hash != NULL) {
3382: while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3383: zend_hash_move_forward(target_hash);
3384: if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3385: if (stack_level >= stack_max) {
3386: stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3387: ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3388: stack = (zval ***)ptmp;
3389: }
3390: stack[stack_level] = var;
3391: stack_level++;
3392: var = hash_entry;
3393: target_hash = HASH_OF(*var);
3394: if (target_hash != NULL) {
3395: zend_hash_internal_pointer_reset(target_hash);
3396: continue;
3397: }
3398: } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3399: string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3400: string.len = Z_STRLEN_PP(hash_entry);
3401: if (mbfl_encoding_detector_feed(identd, &string)) {
3402: goto detect_end; /* complete detecting */
3403: }
3404: }
3405: }
3406: }
3407: } else if (Z_TYPE_PP(var) == IS_STRING) {
3408: string.val = (unsigned char *)Z_STRVAL_PP(var);
3409: string.len = Z_STRLEN_PP(var);
3410: if (mbfl_encoding_detector_feed(identd, &string)) {
3411: goto detect_end; /* complete detecting */
3412: }
3413: }
3414: }
3415: detect_end:
3416: from_encoding = mbfl_encoding_detector_judge(identd);
3417: mbfl_encoding_detector_delete(identd);
3418: }
3419: efree(stack);
3420:
3421: if (from_encoding == mbfl_no_encoding_invalid) {
3422: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3423: from_encoding = mbfl_no_encoding_pass;
3424: }
3425: }
3426: if (elist != NULL) {
3427: efree((void *)elist);
3428: }
3429: /* create converter */
3430: convd = NULL;
3431: if (from_encoding != mbfl_no_encoding_pass) {
3432: convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3433: if (convd == NULL) {
3434: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3435: RETURN_FALSE;
3436: }
3437: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3438: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3439: }
3440:
3441: /* convert */
3442: if (convd != NULL) {
3443: stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3444: stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3445: stack_level = 0;
3446: n = 0;
3447: while (n < argc || stack_level > 0) {
3448: if (stack_level <= 0) {
3449: var = args[n++];
3450: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3451: target_hash = HASH_OF(*var);
3452: if (target_hash != NULL) {
3453: zend_hash_internal_pointer_reset(target_hash);
3454: }
3455: }
3456: } else {
3457: stack_level--;
3458: var = stack[stack_level];
3459: }
3460: if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3461: target_hash = HASH_OF(*var);
3462: if (target_hash != NULL) {
3463: while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3464: zend_hash_move_forward(target_hash);
3465: if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3466: if (stack_level >= stack_max) {
3467: stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3468: ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3469: stack = (zval ***)ptmp;
3470: }
3471: stack[stack_level] = var;
3472: stack_level++;
3473: var = hash_entry;
3474: SEPARATE_ZVAL(hash_entry);
3475: target_hash = HASH_OF(*var);
3476: if (target_hash != NULL) {
3477: zend_hash_internal_pointer_reset(target_hash);
3478: continue;
3479: }
3480: } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3481: string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3482: string.len = Z_STRLEN_PP(hash_entry);
3483: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3484: if (ret != NULL) {
3485: if (Z_REFCOUNT_PP(hash_entry) > 1) {
3486: Z_DELREF_PP(hash_entry);
3487: MAKE_STD_ZVAL(*hash_entry);
3488: } else {
3489: zval_dtor(*hash_entry);
3490: }
3491: ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3492: }
3493: }
3494: }
3495: }
3496: } else if (Z_TYPE_PP(var) == IS_STRING) {
3497: string.val = (unsigned char *)Z_STRVAL_PP(var);
3498: string.len = Z_STRLEN_PP(var);
3499: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3500: if (ret != NULL) {
3501: zval_dtor(*var);
3502: ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3503: }
3504: }
3505: }
3506: efree(stack);
3507:
3508: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3509: mbfl_buffer_converter_delete(convd);
3510: }
3511:
3512: efree(args);
3513:
3514: name = (char *)mbfl_no_encoding2name(from_encoding);
3515: if (name != NULL) {
3516: RETURN_STRING(name, 1);
3517: } else {
3518: RETURN_FALSE;
3519: }
3520: }
3521: /* }}} */
3522:
3523: /* {{{ HTML numeric entity */
3524: /* {{{ static void php_mb_numericentity_exec() */
3525: static void
3526: php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3527: {
3528: char *str, *encoding;
3529: int str_len, encoding_len;
3530: zval *zconvmap, **hash_entry;
3531: HashTable *target_hash;
3532: size_t argc = ZEND_NUM_ARGS();
3533: int i, *convmap, *mapelm, mapsize=0;
3534: mbfl_string string, result, *ret;
3535: enum mbfl_no_encoding no_encoding;
3536:
3537: if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
3538: return;
3539: }
3540:
3541: mbfl_string_init(&string);
3542: string.no_language = MBSTRG(language);
3543: string.no_encoding = MBSTRG(current_internal_encoding);
3544: string.val = (unsigned char *)str;
3545: string.len = str_len;
3546:
3547: /* encoding */
3548: if (argc == 3) {
3549: no_encoding = mbfl_name2no_encoding(encoding);
3550: if (no_encoding == mbfl_no_encoding_invalid) {
3551: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3552: RETURN_FALSE;
3553: } else {
3554: string.no_encoding = no_encoding;
3555: }
3556: }
3557:
3558: /* conversion map */
3559: convmap = NULL;
3560: if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3561: target_hash = Z_ARRVAL_P(zconvmap);
3562: zend_hash_internal_pointer_reset(target_hash);
3563: i = zend_hash_num_elements(target_hash);
3564: if (i > 0) {
3565: convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3566: mapelm = convmap;
3567: mapsize = 0;
3568: while (i > 0) {
3569: if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3570: break;
3571: }
3572: convert_to_long_ex(hash_entry);
3573: *mapelm++ = Z_LVAL_PP(hash_entry);
3574: mapsize++;
3575: i--;
3576: zend_hash_move_forward(target_hash);
3577: }
3578: }
3579: }
3580: if (convmap == NULL) {
3581: RETURN_FALSE;
3582: }
3583: mapsize /= 4;
3584:
3585: ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3586: if (ret != NULL) {
3587: RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3588: } else {
3589: RETVAL_FALSE;
3590: }
3591: efree((void *)convmap);
3592: }
3593: /* }}} */
3594:
3595: /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
3596: Converts specified characters to HTML numeric entities */
3597: PHP_FUNCTION(mb_encode_numericentity)
3598: {
3599: php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3600: }
3601: /* }}} */
3602:
3603: /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3604: Converts HTML numeric entities to character code */
3605: PHP_FUNCTION(mb_decode_numericentity)
3606: {
3607: php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3608: }
3609: /* }}} */
3610: /* }}} */
3611:
3612: /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3613: * Sends an email message with MIME scheme
3614: */
3615:
3616: #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3617: if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3618: pos += 2; \
3619: while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3620: pos++; \
3621: } \
3622: continue; \
3623: }
3624:
3625: #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
3626: pp = str; \
3627: ee = pp + len; \
3628: while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3629: *pp = ' '; \
3630: } \
3631:
3632: #define APPEND_ONE_CHAR(ch) do { \
3633: if (token.a > 0) { \
3634: smart_str_appendc(&token, ch); \
3635: } else {\
3636: token.len++; \
3637: } \
3638: } while (0)
3639:
3640: #define SEPARATE_SMART_STR(str) do {\
3641: if ((str)->a == 0) { \
3642: char *tmp_ptr; \
3643: (str)->a = 1; \
3644: while ((str)->a < (str)->len) { \
3645: (str)->a <<= 1; \
3646: } \
3647: tmp_ptr = emalloc((str)->a + 1); \
3648: memcpy(tmp_ptr, (str)->c, (str)->len); \
3649: (str)->c = tmp_ptr; \
3650: } \
3651: } while (0)
3652:
3653: static void my_smart_str_dtor(smart_str *s)
3654: {
3655: if (s->a > 0) {
3656: smart_str_free(s);
3657: }
3658: }
3659:
3660: static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3661: {
3662: const char *ps;
3663: size_t icnt;
3664: int state = 0;
3665: int crlf_state = -1;
3666:
3667: smart_str token = { 0, 0, 0 };
3668: smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3669:
3670: ps = str;
3671: icnt = str_len;
3672:
3673: /*
3674: * C o n t e n t - T y p e : t e x t / h t m l \r\n
3675: * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3676: * state 0 1 2 3
3677: *
3678: * C o n t e n t - T y p e : t e x t / h t m l \r\n
3679: * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3680: * crlf_state -1 0 1 -1
3681: *
3682: */
3683:
3684: while (icnt > 0) {
3685: switch (*ps) {
3686: case ':':
3687: if (crlf_state == 1) {
3688: APPEND_ONE_CHAR('\r');
3689: }
3690:
3691: if (state == 0 || state == 1) {
3692: fld_name = token;
3693:
3694: state = 2;
3695: } else {
3696: APPEND_ONE_CHAR(*ps);
3697: }
3698:
3699: crlf_state = 0;
3700: break;
3701:
3702: case '\n':
3703: if (crlf_state == -1) {
3704: goto out;
3705: }
3706: crlf_state = -1;
3707: break;
3708:
3709: case '\r':
3710: if (crlf_state == 1) {
3711: APPEND_ONE_CHAR('\r');
3712: } else {
3713: crlf_state = 1;
3714: }
3715: break;
3716:
3717: case ' ': case '\t':
3718: if (crlf_state == -1) {
3719: if (state == 3) {
3720: /* continuing from the previous line */
3721: SEPARATE_SMART_STR(&token);
3722: state = 4;
3723: } else {
3724: /* simply skipping this new line */
3725: state = 5;
3726: }
3727: } else {
3728: if (crlf_state == 1) {
3729: APPEND_ONE_CHAR('\r');
3730: }
3731: if (state == 1 || state == 3) {
3732: APPEND_ONE_CHAR(*ps);
3733: }
3734: }
3735: crlf_state = 0;
3736: break;
3737:
3738: default:
3739: switch (state) {
3740: case 0:
3741: token.c = (char *)ps;
3742: token.len = 0;
3743: token.a = 0;
3744: state = 1;
3745: break;
3746:
3747: case 2:
3748: if (crlf_state != -1) {
3749: token.c = (char *)ps;
3750: token.len = 0;
3751: token.a = 0;
3752:
3753: state = 3;
3754: break;
3755: }
3756: /* break is missing intentionally */
3757:
3758: case 3:
3759: if (crlf_state == -1) {
3760: fld_val = token;
3761:
3762: if (fld_name.c != NULL && fld_val.c != NULL) {
3763: char *dummy;
3764:
3765: /* FIXME: some locale free implementation is
3766: * really required here,,, */
3767: SEPARATE_SMART_STR(&fld_name);
3768: php_strtoupper(fld_name.c, fld_name.len);
3769:
3770: zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3771:
3772: my_smart_str_dtor(&fld_name);
3773: }
3774:
3775: memset(&fld_name, 0, sizeof(smart_str));
3776: memset(&fld_val, 0, sizeof(smart_str));
3777:
3778: token.c = (char *)ps;
3779: token.len = 0;
3780: token.a = 0;
3781:
3782: state = 1;
3783: }
3784: break;
3785:
3786: case 4:
3787: APPEND_ONE_CHAR(' ');
3788: state = 3;
3789: break;
3790: }
3791:
3792: if (crlf_state == 1) {
3793: APPEND_ONE_CHAR('\r');
3794: }
3795:
3796: APPEND_ONE_CHAR(*ps);
3797:
3798: crlf_state = 0;
3799: break;
3800: }
3801: ps++, icnt--;
3802: }
3803: out:
3804: if (state == 2) {
3805: token.c = "";
3806: token.len = 0;
3807: token.a = 0;
3808:
3809: state = 3;
3810: }
3811: if (state == 3) {
3812: fld_val = token;
3813:
3814: if (fld_name.c != NULL && fld_val.c != NULL) {
3815: void *dummy;
3816:
3817: /* FIXME: some locale free implementation is
3818: * really required here,,, */
3819: SEPARATE_SMART_STR(&fld_name);
3820: php_strtoupper(fld_name.c, fld_name.len);
3821:
3822: zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3823:
3824: my_smart_str_dtor(&fld_name);
3825: }
3826: }
3827: return state;
3828: }
3829:
3830: PHP_FUNCTION(mb_send_mail)
3831: {
3832: int n;
3833: char *to = NULL;
3834: int to_len;
3835: char *message = NULL;
3836: int message_len;
3837: char *headers = NULL;
3838: int headers_len;
3839: char *subject = NULL;
3840: int subject_len;
3841: char *extra_cmd = NULL;
3842: int extra_cmd_len;
3843: int i;
3844: char *to_r = NULL;
3845: char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3846: struct {
3847: int cnt_type:1;
3848: int cnt_trans_enc:1;
3849: } suppressed_hdrs = { 0, 0 };
3850:
3851: char *message_buf = NULL, *subject_buf = NULL, *p;
3852: mbfl_string orig_str, conv_str;
3853: mbfl_string *pstr; /* pointer to mbfl string for return value */
3854: enum mbfl_no_encoding
3855: tran_cs, /* transfar text charset */
3856: head_enc, /* header transfar encoding */
3857: body_enc; /* body transfar encoding */
3858: mbfl_memory_device device; /* automatic allocateable buffer for additional header */
3859: const mbfl_language *lang;
3860: int err = 0;
3861: HashTable ht_headers;
3862: smart_str *s;
3863: extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3864: char *pp, *ee;
3865:
3866: if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
3867: php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE.");
3868: RETURN_FALSE;
3869: }
3870:
3871: /* initialize */
3872: mbfl_memory_device_init(&device, 0, 0);
3873: mbfl_string_init(&orig_str);
3874: mbfl_string_init(&conv_str);
3875:
3876: /* character-set, transfer-encoding */
3877: tran_cs = mbfl_no_encoding_utf8;
3878: head_enc = mbfl_no_encoding_base64;
3879: body_enc = mbfl_no_encoding_base64;
3880: lang = mbfl_no2language(MBSTRG(language));
3881: if (lang != NULL) {
3882: tran_cs = lang->mail_charset;
3883: head_enc = lang->mail_header_encoding;
3884: body_enc = lang->mail_body_encoding;
3885: }
3886:
3887: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
3888: return;
3889: }
3890:
3891: /* ASCIIZ check */
3892: MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
3893: MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
3894: MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
3895: if (headers) {
3896: MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
3897: }
3898: if (extra_cmd) {
3899: MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
3900: }
3901:
3902: zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
3903:
3904: if (headers != NULL) {
3905: _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
3906: }
3907:
3908: if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
3909: char *tmp;
3910: char *param_name;
3911: char *charset = NULL;
3912:
3913: SEPARATE_SMART_STR(s);
3914: smart_str_0(s);
3915:
3916: p = strchr(s->c, ';');
3917:
3918: if (p != NULL) {
3919: /* skipping the padded spaces */
3920: do {
3921: ++p;
3922: } while (*p == ' ' || *p == '\t');
3923:
3924: if (*p != '\0') {
3925: if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3926: if (strcasecmp(param_name, "charset") == 0) {
3927: enum mbfl_no_encoding _tran_cs = tran_cs;
3928:
3929: charset = php_strtok_r(NULL, "= \"", &tmp);
3930: if (charset != NULL) {
3931: _tran_cs = mbfl_name2no_encoding(charset);
3932: }
3933:
3934: if (_tran_cs == mbfl_no_encoding_invalid) {
3935: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3936: _tran_cs = mbfl_no_encoding_ascii;
3937: }
3938: tran_cs = _tran_cs;
3939: }
3940: }
3941: }
3942: }
3943: suppressed_hdrs.cnt_type = 1;
3944: }
3945:
3946: if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
3947: enum mbfl_no_encoding _body_enc;
3948: SEPARATE_SMART_STR(s);
3949: smart_str_0(s);
3950:
3951: _body_enc = mbfl_name2no_encoding(s->c);
3952: switch (_body_enc) {
3953: case mbfl_no_encoding_base64:
3954: case mbfl_no_encoding_7bit:
3955: case mbfl_no_encoding_8bit:
3956: body_enc = _body_enc;
3957: break;
3958:
3959: default:
3960: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
3961: body_enc = mbfl_no_encoding_8bit;
3962: break;
3963: }
3964: suppressed_hdrs.cnt_trans_enc = 1;
3965: }
3966:
3967: /* To: */
3968: if (to != NULL) {
3969: if (to_len > 0) {
3970: to_r = estrndup(to, to_len);
3971: for (; to_len; to_len--) {
3972: if (!isspace((unsigned char) to_r[to_len - 1])) {
3973: break;
3974: }
3975: to_r[to_len - 1] = '\0';
3976: }
3977: for (i = 0; to_r[i]; i++) {
3978: if (iscntrl((unsigned char) to_r[i])) {
3979: /* According to RFC 822, section 3.1.1 long headers may be separated into
3980: * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3981: * To prevent these separators from being replaced with a space, we use the
3982: * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3983: */
3984: SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3985: to_r[i] = ' ';
3986: }
3987: }
3988: } else {
3989: to_r = to;
3990: }
3991: } else {
3992: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
3993: err = 1;
3994: }
3995:
3996: /* Subject: */
3997: if (subject != NULL && subject_len >= 0) {
3998: orig_str.no_language = MBSTRG(language);
3999: orig_str.val = (unsigned char *)subject;
4000: orig_str.len = subject_len;
4001: orig_str.no_encoding = MBSTRG(current_internal_encoding);
4002: if (orig_str.no_encoding == mbfl_no_encoding_invalid
4003: || orig_str.no_encoding == mbfl_no_encoding_pass) {
4004: orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4005: }
4006: pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4007: if (pstr != NULL) {
4008: subject_buf = subject = (char *)pstr->val;
4009: }
4010: } else {
4011: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4012: err = 1;
4013: }
4014:
4015: /* message body */
4016: if (message != NULL) {
4017: orig_str.no_language = MBSTRG(language);
4018: orig_str.val = (unsigned char *)message;
4019: orig_str.len = (unsigned int)message_len;
4020: orig_str.no_encoding = MBSTRG(current_internal_encoding);
4021:
4022: if (orig_str.no_encoding == mbfl_no_encoding_invalid
4023: || orig_str.no_encoding == mbfl_no_encoding_pass) {
4024: orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4025: }
4026:
4027: pstr = NULL;
4028: {
4029: mbfl_string tmpstr;
4030:
4031: if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4032: tmpstr.no_encoding=mbfl_no_encoding_8bit;
4033: pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4034: efree(tmpstr.val);
4035: }
4036: }
4037: if (pstr != NULL) {
4038: message_buf = message = (char *)pstr->val;
4039: }
4040: } else {
4041: /* this is not really an error, so it is allowed. */
4042: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4043: message = NULL;
4044: }
4045:
4046: /* other headers */
4047: #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4048: #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4049: #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4050: #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4051: if (headers != NULL) {
4052: p = headers;
4053: n = headers_len;
4054: mbfl_memory_device_strncat(&device, p, n);
4055: if (n > 0 && p[n - 1] != '\n') {
4056: mbfl_memory_device_strncat(&device, "\n", 1);
4057: }
4058: }
4059:
4060: if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4061: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4062: mbfl_memory_device_strncat(&device, "\n", 1);
4063: }
4064:
4065: if (!suppressed_hdrs.cnt_type) {
4066: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4067:
4068: p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4069: if (p != NULL) {
4070: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4071: mbfl_memory_device_strcat(&device, p);
4072: }
4073: mbfl_memory_device_strncat(&device, "\n", 1);
4074: }
4075: if (!suppressed_hdrs.cnt_trans_enc) {
4076: mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4077: p = (char *)mbfl_no2preferred_mime_name(body_enc);
4078: if (p == NULL) {
4079: p = "7bit";
4080: }
4081: mbfl_memory_device_strcat(&device, p);
4082: mbfl_memory_device_strncat(&device, "\n", 1);
4083: }
4084:
4085: mbfl_memory_device_unput(&device);
4086: mbfl_memory_device_output('\0', &device);
4087: headers = (char *)device.buffer;
4088:
4089: if (force_extra_parameters) {
4090: extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4091: } else if (extra_cmd) {
4092: extra_cmd = php_escape_shell_cmd(extra_cmd);
4093: }
4094:
4095: if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4096: RETVAL_TRUE;
4097: } else {
4098: RETVAL_FALSE;
4099: }
4100:
4101: if (extra_cmd) {
4102: efree(extra_cmd);
4103: }
4104: if (to_r != to) {
4105: efree(to_r);
4106: }
4107: if (subject_buf) {
4108: efree((void *)subject_buf);
4109: }
4110: if (message_buf) {
4111: efree((void *)message_buf);
4112: }
4113: mbfl_memory_device_clear(&device);
4114: zend_hash_destroy(&ht_headers);
4115: }
4116:
4117: #undef SKIP_LONG_HEADER_SEP_MBSTRING
4118: #undef MAIL_ASCIIZ_CHECK_MBSTRING
4119: #undef APPEND_ONE_CHAR
4120: #undef SEPARATE_SMART_STR
4121: #undef PHP_MBSTR_MAIL_MIME_HEADER1
4122: #undef PHP_MBSTR_MAIL_MIME_HEADER2
4123: #undef PHP_MBSTR_MAIL_MIME_HEADER3
4124: #undef PHP_MBSTR_MAIL_MIME_HEADER4
4125: /* }}} */
4126:
4127: /* {{{ proto mixed mb_get_info([string type])
4128: Returns the current settings of mbstring */
4129: PHP_FUNCTION(mb_get_info)
4130: {
4131: char *typ = NULL;
4132: int typ_len, n;
4133: char *name;
4134: const struct mb_overload_def *over_func;
4135: zval *row1, *row2;
4136: const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4137: enum mbfl_no_encoding *entry;
4138: #ifdef ZEND_MULTIBYTE
4139: zval *row3;
4140: #endif /* ZEND_MULTIBYTE */
4141:
4142: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4143: RETURN_FALSE;
4144: }
4145:
4146: if (!typ || !strcasecmp("all", typ)) {
4147: array_init(return_value);
4148: if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4149: add_assoc_string(return_value, "internal_encoding", name, 1);
4150: }
4151: if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4152: add_assoc_string(return_value, "http_input", name, 1);
4153: }
4154: if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4155: add_assoc_string(return_value, "http_output", name, 1);
4156: }
4157: if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4158: add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4159: }
4160: add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4161: if (MBSTRG(func_overload)){
4162: over_func = &(mb_ovld[0]);
4163: MAKE_STD_ZVAL(row1);
4164: array_init(row1);
4165: while (over_func->type > 0) {
4166: if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4167: add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4168: }
4169: over_func++;
4170: }
4171: add_assoc_zval(return_value, "func_overload_list", row1);
4172: } else {
4173: add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4174: }
4175: if (lang != NULL) {
4176: if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4177: add_assoc_string(return_value, "mail_charset", name, 1);
4178: }
4179: if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4180: add_assoc_string(return_value, "mail_header_encoding", name, 1);
4181: }
4182: if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4183: add_assoc_string(return_value, "mail_body_encoding", name, 1);
4184: }
4185: }
4186: add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4187: if (MBSTRG(encoding_translation)) {
4188: add_assoc_string(return_value, "encoding_translation", "On", 1);
4189: } else {
4190: add_assoc_string(return_value, "encoding_translation", "Off", 1);
4191: }
4192: if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4193: add_assoc_string(return_value, "language", name, 1);
4194: }
4195: n = MBSTRG(current_detect_order_list_size);
4196: entry = MBSTRG(current_detect_order_list);
4197: if(n > 0) {
4198: MAKE_STD_ZVAL(row2);
4199: array_init(row2);
4200: while (n > 0) {
4201: if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4202: add_next_index_string(row2, name, 1);
4203: }
4204: entry++;
4205: n--;
4206: }
4207: add_assoc_zval(return_value, "detect_order", row2);
4208: }
4209: if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4210: add_assoc_string(return_value, "substitute_character", "none", 1);
4211: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4212: add_assoc_string(return_value, "substitute_character", "long", 1);
4213: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4214: add_assoc_string(return_value, "substitute_character", "entity", 1);
4215: } else {
4216: add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4217: }
4218: if (MBSTRG(strict_detection)) {
4219: add_assoc_string(return_value, "strict_detection", "On", 1);
4220: } else {
4221: add_assoc_string(return_value, "strict_detection", "Off", 1);
4222: }
4223: #ifdef ZEND_MULTIBYTE
4224: entry = MBSTRG(script_encoding_list);
4225: n = MBSTRG(script_encoding_list_size);
4226: if(n > 0) {
4227: MAKE_STD_ZVAL(row3);
4228: array_init(row3);
4229: while (n > 0) {
4230: if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4231: add_next_index_string(row3, name, 1);
4232: }
4233: entry++;
4234: n--;
4235: }
4236: add_assoc_zval(return_value, "script_encoding", row3);
4237: }
4238: #endif /* ZEND_MULTIBYTE */
4239: } else if (!strcasecmp("internal_encoding", typ)) {
4240: if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4241: RETVAL_STRING(name, 1);
4242: }
4243: } else if (!strcasecmp("http_input", typ)) {
4244: if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4245: RETVAL_STRING(name, 1);
4246: }
4247: } else if (!strcasecmp("http_output", typ)) {
4248: if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4249: RETVAL_STRING(name, 1);
4250: }
4251: } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4252: if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4253: RETVAL_STRING(name, 1);
4254: }
4255: } else if (!strcasecmp("func_overload", typ)) {
4256: RETVAL_LONG(MBSTRG(func_overload));
4257: } else if (!strcasecmp("func_overload_list", typ)) {
4258: if (MBSTRG(func_overload)){
4259: over_func = &(mb_ovld[0]);
4260: array_init(return_value);
4261: while (over_func->type > 0) {
4262: if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4263: add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4264: }
4265: over_func++;
4266: }
4267: } else {
4268: RETVAL_STRING("no overload", 1);
4269: }
4270: } else if (!strcasecmp("mail_charset", typ)) {
4271: if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4272: RETVAL_STRING(name, 1);
4273: }
4274: } else if (!strcasecmp("mail_header_encoding", typ)) {
4275: if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4276: RETVAL_STRING(name, 1);
4277: }
4278: } else if (!strcasecmp("mail_body_encoding", typ)) {
4279: if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4280: RETVAL_STRING(name, 1);
4281: }
4282: } else if (!strcasecmp("illegal_chars", typ)) {
4283: RETVAL_LONG(MBSTRG(illegalchars));
4284: } else if (!strcasecmp("encoding_translation", typ)) {
4285: if (MBSTRG(encoding_translation)) {
4286: RETVAL_STRING("On", 1);
4287: } else {
4288: RETVAL_STRING("Off", 1);
4289: }
4290: } else if (!strcasecmp("language", typ)) {
4291: if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4292: RETVAL_STRING(name, 1);
4293: }
4294: } else if (!strcasecmp("detect_order", typ)) {
4295: n = MBSTRG(current_detect_order_list_size);
4296: entry = MBSTRG(current_detect_order_list);
4297: if(n > 0) {
4298: array_init(return_value);
4299: while (n > 0) {
4300: name = (char *)mbfl_no_encoding2name(*entry);
4301: if (name) {
4302: add_next_index_string(return_value, name, 1);
4303: }
4304: entry++;
4305: n--;
4306: }
4307: }
4308: } else if (!strcasecmp("substitute_character", typ)) {
4309: if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4310: RETVAL_STRING("none", 1);
4311: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4312: RETVAL_STRING("long", 1);
4313: } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4314: RETVAL_STRING("entity", 1);
4315: } else {
4316: RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4317: }
4318: } else if (!strcasecmp("strict_detection", typ)) {
4319: if (MBSTRG(strict_detection)) {
4320: RETVAL_STRING("On", 1);
4321: } else {
4322: RETVAL_STRING("Off", 1);
4323: }
4324: } else {
4325: #ifdef ZEND_MULTIBYTE
4326: if (!strcasecmp("script_encoding", typ)) {
4327: entry = MBSTRG(script_encoding_list);
4328: n = MBSTRG(script_encoding_list_size);
4329: if(n > 0) {
4330: array_init(return_value);
4331: while (n > 0) {
4332: name = (char *)mbfl_no_encoding2name(*entry);
4333: if (name) {
4334: add_next_index_string(return_value, name, 1);
4335: }
4336: entry++;
4337: n--;
4338: }
4339: }
4340: return;
4341: }
4342: #endif /* ZEND_MULTIBYTE */
4343: RETURN_FALSE;
4344: }
4345: }
4346: /* }}} */
4347:
4348: /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4349: Check if the string is valid for the specified encoding */
4350: PHP_FUNCTION(mb_check_encoding)
4351: {
4352: char *var = NULL;
4353: int var_len;
4354: char *enc = NULL;
4355: int enc_len;
4356: mbfl_buffer_converter *convd;
4357: enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
4358: mbfl_string string, result, *ret = NULL;
4359: long illegalchars = 0;
4360:
4361: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4362: RETURN_FALSE;
4363: }
4364:
4365: if (var == NULL) {
4366: RETURN_BOOL(MBSTRG(illegalchars) == 0);
4367: }
4368:
4369: if (enc != NULL) {
4370: no_encoding = mbfl_name2no_encoding(enc);
4371: if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
4372: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4373: RETURN_FALSE;
4374: }
4375: }
4376:
4377: convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
4378: if (convd == NULL) {
4379: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4380: RETURN_FALSE;
4381: }
4382: mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4383: mbfl_buffer_converter_illegal_substchar(convd, 0);
4384:
4385: /* initialize string */
4386: mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
4387: mbfl_string_init(&result);
4388:
4389: string.val = (unsigned char *)var;
4390: string.len = var_len;
4391: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4392: illegalchars = mbfl_buffer_illegalchars(convd);
4393: mbfl_buffer_converter_delete(convd);
4394:
4395: RETVAL_FALSE;
4396: if (ret != NULL) {
4397: if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4398: RETVAL_TRUE;
4399: }
4400: mbfl_string_clear(&result);
4401: }
4402: }
4403: /* }}} */
4404:
4405: /* {{{ MBSTRING_API int php_mb_encoding_translation() */
4406: MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
4407: {
4408: return MBSTRG(encoding_translation);
4409: }
4410: /* }}} */
4411:
4412: /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4413: MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4414: {
4415: if (enc != NULL) {
4416: if (enc->flag & MBFL_ENCTYPE_MBCS) {
4417: if (enc->mblen_table != NULL) {
4418: if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4419: }
4420: } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4421: return 2;
4422: } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4423: return 4;
4424: }
4425: }
4426: return 1;
4427: }
4428: /* }}} */
4429:
4430: /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4431: MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4432: {
4433: return php_mb_mbchar_bytes_ex(s,
4434: mbfl_no2encoding(MBSTRG(internal_encoding)));
4435: }
4436: /* }}} */
4437:
4438: /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4439: MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4440: {
4441: register const char *p = s;
4442: char *last=NULL;
4443:
4444: if (nbytes == (size_t)-1) {
4445: size_t nb = 0;
4446:
4447: while (*p != '\0') {
4448: if (nb == 0) {
4449: if ((unsigned char)*p == (unsigned char)c) {
4450: last = (char *)p;
4451: }
4452: nb = php_mb_mbchar_bytes_ex(p, enc);
4453: if (nb == 0) {
4454: return NULL; /* something is going wrong! */
4455: }
4456: }
4457: --nb;
4458: ++p;
4459: }
4460: } else {
4461: register size_t bcnt = nbytes;
4462: register size_t nbytes_char;
4463: while (bcnt > 0) {
4464: if ((unsigned char)*p == (unsigned char)c) {
4465: last = (char *)p;
4466: }
4467: nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4468: if (bcnt < nbytes_char) {
4469: return NULL;
4470: }
4471: p += nbytes_char;
4472: bcnt -= nbytes_char;
4473: }
4474: }
4475: return last;
4476: }
4477: /* }}} */
4478:
4479: /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4480: MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4481: {
4482: return php_mb_safe_strrchr_ex(s, c, nbytes,
4483: mbfl_no2encoding(MBSTRG(internal_encoding)));
4484: }
4485: /* }}} */
4486:
4487: /* {{{ MBSTRING_API char *php_mb_strrchr() */
4488: MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
4489: {
4490: return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
4491: }
4492: /* }}} */
4493:
4494: /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
4495: MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
4496: {
4497:
4498: if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
4499: return php_mb_mbchar_bytes_ex(s,
4500: mbfl_no2encoding(MBSTRG(http_input_identify)));
4501: } else {
4502: return php_mb_mbchar_bytes_ex(s,
4503: mbfl_no2encoding(MBSTRG(internal_encoding)));
4504: }
4505: }
4506: /* }}} */
4507:
4508: /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
4509: MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4510: {
4511: int i;
4512: mbfl_string string, result, *ret = NULL;
4513: enum mbfl_no_encoding from_encoding, to_encoding;
4514: mbfl_buffer_converter *convd;
4515:
4516: if (encoding_to) {
4517: /* new encoding */
4518: to_encoding = mbfl_name2no_encoding(encoding_to);
4519: if (to_encoding == mbfl_no_encoding_invalid) {
4520: return -1;
4521: }
4522: } else {
4523: to_encoding = MBSTRG(current_internal_encoding);
4524: }
4525: if (encoding_from) {
4526: /* old encoding */
4527: from_encoding = mbfl_name2no_encoding(encoding_from);
4528: if (from_encoding == mbfl_no_encoding_invalid) {
4529: return -1;
4530: }
4531: } else {
4532: from_encoding = MBSTRG(http_input_identify);
4533: }
4534:
4535: if (from_encoding == mbfl_no_encoding_pass) {
4536: return 0;
4537: }
4538:
4539: /* initialize string */
4540: mbfl_string_init(&string);
4541: mbfl_string_init(&result);
4542: string.no_encoding = from_encoding;
4543: string.no_language = MBSTRG(language);
4544:
4545: for (i=0; i<num; i++){
4546: string.val = (unsigned char *)str[i];
4547: string.len = len[i];
4548:
4549: /* initialize converter */
4550: convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4551: if (convd == NULL) {
4552: return -1;
4553: }
4554: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4555: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4556:
4557: /* do it */
4558: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4559: if (ret != NULL) {
4560: efree(str[i]);
4561: str[i] = (char *)ret->val;
4562: len[i] = (int)ret->len;
4563: }
4564:
4565: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4566: mbfl_buffer_converter_delete(convd);
4567: }
4568:
4569: return ret ? 0 : -1;
4570: }
4571: /* }}} */
4572:
4573: /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
4574: */
4575: MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
4576: {
4577: mbfl_string string;
4578: enum mbfl_no_encoding *elist;
4579: enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
4580: mbfl_encoding_detector *identd = NULL;
4581:
4582: int size;
4583: enum mbfl_no_encoding *list;
4584:
4585: if (MBSTRG(http_input_list_size) == 1 &&
4586: MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
4587: MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
4588: return SUCCESS;
4589: }
4590:
4591: if (MBSTRG(http_input_list_size) == 1 &&
4592: MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
4593: mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
4594: MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
4595: return SUCCESS;
4596: }
4597:
4598: if (arg_list && strlen(arg_list)>0) {
4599: /* make encoding list */
4600: list = NULL;
4601: size = 0;
4602: php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4603:
4604: if (size > 0 && list != NULL) {
4605: elist = list;
4606: } else {
4607: elist = MBSTRG(current_detect_order_list);
4608: size = MBSTRG(current_detect_order_list_size);
4609: if (size <= 0){
4610: elist = MBSTRG(default_detect_order_list);
4611: size = MBSTRG(default_detect_order_list_size);
4612: }
4613: }
4614: } else {
4615: elist = MBSTRG(current_detect_order_list);
4616: size = MBSTRG(current_detect_order_list_size);
4617: if (size <= 0){
4618: elist = MBSTRG(default_detect_order_list);
4619: size = MBSTRG(default_detect_order_list_size);
4620: }
4621: }
4622:
4623: mbfl_string_init(&string);
4624: string.no_language = MBSTRG(language);
4625:
4626: identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
4627:
4628: if (identd) {
4629: int n = 0;
4630: while(n < num){
4631: string.val = (unsigned char *)arg_string[n];
4632: string.len = arg_length[n];
4633: if (mbfl_encoding_detector_feed(identd, &string)) {
4634: break;
4635: }
4636: n++;
4637: }
4638: encoding = mbfl_encoding_detector_judge(identd);
4639: mbfl_encoding_detector_delete(identd);
4640: }
4641:
4642: if (encoding != mbfl_no_encoding_invalid) {
4643: MBSTRG(http_input_identify) = encoding;
4644: return SUCCESS;
4645: } else {
4646: return FAILURE;
4647: }
4648: }
4649: /* }}} */
4650:
4651: /* {{{ MBSTRING_API int php_mb_stripos()
4652: */
4653: MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4654: {
4655: int n;
4656: mbfl_string haystack, needle;
4657: n = -1;
4658:
4659: mbfl_string_init(&haystack);
4660: mbfl_string_init(&needle);
4661: haystack.no_language = MBSTRG(language);
4662: haystack.no_encoding = MBSTRG(current_internal_encoding);
4663: needle.no_language = MBSTRG(language);
4664: needle.no_encoding = MBSTRG(current_internal_encoding);
4665:
4666: do {
4667: size_t len = 0;
4668: haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4669: haystack.len = len;
4670:
4671: if (!haystack.val) {
4672: break;
4673: }
4674:
4675: if (haystack.len <= 0) {
4676: break;
4677: }
4678:
4679: needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4680: needle.len = len;
4681:
4682: if (!needle.val) {
4683: break;
4684: }
4685:
4686: if (needle.len <= 0) {
4687: break;
4688: }
4689:
4690: haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4691: if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4692: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4693: break;
4694: }
4695:
4696: {
4697: int haystack_char_len = mbfl_strlen(&haystack);
4698:
4699: if (mode) {
4700: if ((offset > 0 && offset > haystack_char_len) ||
4701: (offset < 0 && -offset > haystack_char_len)) {
4702: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4703: break;
4704: }
4705: } else {
4706: if (offset < 0 || offset > haystack_char_len) {
4707: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4708: break;
4709: }
4710: }
4711: }
4712:
4713: n = mbfl_strpos(&haystack, &needle, offset, mode);
4714: } while(0);
4715:
4716: if (haystack.val) {
4717: efree(haystack.val);
4718: }
4719:
4720: if (needle.val) {
4721: efree(needle.val);
4722: }
4723:
4724: return n;
4725: }
4726: /* }}} */
4727:
4728: #ifdef ZEND_MULTIBYTE
4729: /* {{{ php_mb_set_zend_encoding() */
4730: static int php_mb_set_zend_encoding(TSRMLS_D)
4731: {
4732: /* 'd better use mbfl_memory_device? */
4733: char *name, *list = NULL;
4734: int n, *entry, list_size = 0;
4735: zend_encoding_detector encoding_detector;
4736: zend_encoding_converter encoding_converter;
4737: zend_encoding_oddlen encoding_oddlen;
4738:
4739: /* notify script encoding to Zend Engine */
4740: entry = MBSTRG(script_encoding_list);
4741: n = MBSTRG(script_encoding_list_size);
4742: while (n > 0) {
4743: name = (char *)mbfl_no_encoding2name(*entry);
4744: if (name) {
4745: list_size += strlen(name) + 1;
4746: if (!list) {
4747: list = (char*)emalloc(list_size);
4748: *list = '\0';
4749: } else {
4750: list = (char*)erealloc(list, list_size);
4751: strcat(list, ",");
4752: }
4753: strcat(list, name);
4754: }
4755: entry++;
4756: n--;
4757: }
4758: zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
4759: if (list) {
4760: efree(list);
4761: }
4762: encoding_detector = php_mb_encoding_detector;
4763: encoding_converter = php_mb_encoding_converter;
4764: encoding_oddlen = php_mb_oddlen;
4765:
4766: /* TODO: make independent from mbstring.encoding_translation? */
4767: if (MBSTRG(encoding_translation)) {
4768: /* notify internal encoding to Zend Engine */
4769: name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
4770: zend_multibyte_set_internal_encoding(name TSRMLS_CC);
4771: }
4772:
4773: zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
4774:
4775: return 0;
4776: }
4777: /* }}} */
4778:
4779: /* {{{ char *php_mb_encoding_detector()
4780: * Interface for Zend Engine
4781: */
4782: static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
4783: {
4784: mbfl_string string;
4785: const char *ret;
4786: enum mbfl_no_encoding *elist;
4787: int size, *list;
4788:
4789: /* make encoding list */
4790: list = NULL;
4791: size = 0;
4792: php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4793: if (size <= 0) {
4794: return NULL;
4795: }
4796: if (size > 0 && list != NULL) {
4797: elist = list;
4798: } else {
4799: elist = MBSTRG(current_detect_order_list);
4800: size = MBSTRG(current_detect_order_list_size);
4801: }
4802:
4803: mbfl_string_init(&string);
4804: string.no_language = MBSTRG(language);
4805: string.val = (unsigned char *)arg_string;
4806: string.len = arg_length;
4807: ret = mbfl_identify_encoding_name(&string, elist, size, 0);
4808: if (list != NULL) {
4809: efree((void *)list);
4810: }
4811: if (ret != NULL) {
4812: return estrdup(ret);
4813: } else {
4814: return NULL;
4815: }
4816: }
4817: /* }}} */
4818:
4819: /* {{{ int php_mb_encoding_converter() */
4820: static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4821: {
4822: mbfl_string string, result, *ret;
4823: enum mbfl_no_encoding from_encoding, to_encoding;
4824: mbfl_buffer_converter *convd;
4825:
4826: /* new encoding */
4827: to_encoding = mbfl_name2no_encoding(encoding_to);
4828: if (to_encoding == mbfl_no_encoding_invalid) {
4829: return -1;
4830: }
4831: /* old encoding */
4832: from_encoding = mbfl_name2no_encoding(encoding_from);
4833: if (from_encoding == mbfl_no_encoding_invalid) {
4834: return -1;
4835: }
4836: /* initialize string */
4837: mbfl_string_init(&string);
4838: mbfl_string_init(&result);
4839: string.no_encoding = from_encoding;
4840: string.no_language = MBSTRG(language);
4841: string.val = (unsigned char*)from;
4842: string.len = from_length;
4843:
4844: /* initialize converter */
4845: convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4846: if (convd == NULL) {
4847: return -1;
4848: }
4849: mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4850: mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4851:
4852: /* do it */
4853: ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4854: if (ret != NULL) {
4855: *to = ret->val;
4856: *to_length = ret->len;
4857: }
4858:
4859: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4860: mbfl_buffer_converter_delete(convd);
4861:
4862: return ret ? 0 : -1;
4863: }
4864: /* }}} */
4865:
4866: /* {{{ int php_mb_oddlen()
4867: * returns number of odd (e.g. appears only first byte of multibyte
4868: * character) chars
4869: */
4870: static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
4871: {
4872: mbfl_string mb_string;
4873:
4874: mbfl_string_init(&mb_string);
4875: mb_string.no_language = MBSTRG(language);
4876: mb_string.no_encoding = mbfl_name2no_encoding(encoding);
4877: mb_string.val = (unsigned char *)string;
4878: mb_string.len = length;
4879:
4880: if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
4881: return 0;
4882: }
4883: return mbfl_oddlen(&mb_string);
4884: }
4885: /* }}} */
4886: #endif /* ZEND_MULTIBYTE */
4887:
4888: #endif /* HAVE_MBSTRING */
4889:
4890: /*
4891: * Local variables:
4892: * tab-width: 4
4893: * c-basic-offset: 4
4894: * End:
4895: * vim600: fdm=marker
4896: * vim: noet sw=4 ts=4
4897: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>