![]() ![]() | ![]() |
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
1.1.1.4 ! misho 5: | Copyright (c) 1997-2014 The PHP Group |
1.1 misho 6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 misho 19: /* $Id$ */
1.1 misho 20:
21:
22: #ifdef HAVE_CONFIG_H
23: #include "config.h"
24: #endif
25:
26: #include "php.h"
27: #include "php_ini.h"
28:
29: #if HAVE_MBREGEX
30:
31: #include "ext/standard/php_smart_str.h"
32: #include "ext/standard/info.h"
33: #include "php_mbregex.h"
34: #include "mbstring.h"
35:
36: #include "php_onig_compat.h" /* must come prior to the oniguruma header */
37: #include <oniguruma.h>
38: #undef UChar
39:
40: ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41:
42: struct _zend_mb_regex_globals {
43: OnigEncoding default_mbctype;
44: OnigEncoding current_mbctype;
45: HashTable ht_rc;
46: zval *search_str;
47: zval *search_str_val;
48: unsigned int search_pos;
49: php_mb_regex_t *search_re;
50: OnigRegion *search_regs;
51: OnigOptionType regex_default_options;
52: OnigSyntaxType *regex_default_syntax;
53: };
54:
55: #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56:
57: /* {{{ static void php_mb_regex_free_cache() */
58: static void php_mb_regex_free_cache(php_mb_regex_t **pre)
59: {
60: onig_free(*pre);
61: }
62: /* }}} */
63:
64: /* {{{ _php_mb_regex_globals_ctor */
65: static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
66: {
67: pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
68: pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
69: zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
70: pglobals->search_str = (zval*) NULL;
71: pglobals->search_re = (php_mb_regex_t*)NULL;
72: pglobals->search_pos = 0;
73: pglobals->search_regs = (OnigRegion*)NULL;
74: pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
75: pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
76: return SUCCESS;
77: }
78: /* }}} */
79:
80: /* {{{ _php_mb_regex_globals_dtor */
81: static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
82: {
83: zend_hash_destroy(&pglobals->ht_rc);
84: }
85: /* }}} */
86:
87: /* {{{ php_mb_regex_globals_alloc */
88: zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
89: {
90: zend_mb_regex_globals *pglobals = pemalloc(
91: sizeof(zend_mb_regex_globals), 1);
92: if (!pglobals) {
93: return NULL;
94: }
95: if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
96: pefree(pglobals, 1);
97: return NULL;
98: }
99: return pglobals;
100: }
101: /* }}} */
102:
103: /* {{{ php_mb_regex_globals_free */
104: void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
105: {
106: if (!pglobals) {
107: return;
108: }
109: _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
110: pefree(pglobals, 1);
111: }
112: /* }}} */
113:
114: /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
115: PHP_MINIT_FUNCTION(mb_regex)
116: {
117: onig_init();
118: return SUCCESS;
119: }
120: /* }}} */
121:
122: /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
123: PHP_MSHUTDOWN_FUNCTION(mb_regex)
124: {
125: onig_end();
126: return SUCCESS;
127: }
128: /* }}} */
129:
130: /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
131: PHP_RINIT_FUNCTION(mb_regex)
132: {
133: return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
134: }
135: /* }}} */
136:
137: /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
138: PHP_RSHUTDOWN_FUNCTION(mb_regex)
139: {
140: MBREX(current_mbctype) = MBREX(default_mbctype);
141:
142: if (MBREX(search_str) != NULL) {
143: zval_ptr_dtor(&MBREX(search_str));
144: MBREX(search_str) = (zval *)NULL;
145: }
146: MBREX(search_pos) = 0;
147:
148: if (MBREX(search_regs) != NULL) {
149: onig_region_free(MBREX(search_regs), 1);
150: MBREX(search_regs) = (OnigRegion *)NULL;
151: }
152: zend_hash_clean(&MBREX(ht_rc));
153:
154: return SUCCESS;
155: }
156: /* }}} */
157:
158: /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
159: PHP_MINFO_FUNCTION(mb_regex)
160: {
161: char buf[32];
162: php_info_print_table_start();
163: php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
164: snprintf(buf, sizeof(buf), "%d.%d.%d",
165: ONIGURUMA_VERSION_MAJOR,
166: ONIGURUMA_VERSION_MINOR,
167: ONIGURUMA_VERSION_TEENY);
168: #ifdef PHP_ONIG_BUNDLED
169: #ifdef USE_COMBINATION_EXPLOSION_CHECK
170: php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
171: #else /* USE_COMBINATION_EXPLOSION_CHECK */
172: php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
173: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
174: #endif /* PHP_BUNDLED_ONIG */
175: php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
176: php_info_print_table_end();
177: }
178: /* }}} */
179:
180: /*
181: * encoding name resolver
182: */
183:
184: /* {{{ encoding name map */
185: typedef struct _php_mb_regex_enc_name_map_t {
186: const char *names;
187: OnigEncoding code;
188: } php_mb_regex_enc_name_map_t;
189:
190: php_mb_regex_enc_name_map_t enc_name_map[] = {
191: #ifdef ONIG_ENCODING_EUC_JP
192: {
193: "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
194: ONIG_ENCODING_EUC_JP
195: },
196: #endif
197: #ifdef ONIG_ENCODING_UTF8
198: {
199: "UTF-8\0UTF8\0",
200: ONIG_ENCODING_UTF8
201: },
202: #endif
203: #ifdef ONIG_ENCODING_UTF16_BE
204: {
205: "UTF-16\0UTF-16BE\0",
206: ONIG_ENCODING_UTF16_BE
207: },
208: #endif
209: #ifdef ONIG_ENCODING_UTF16_LE
210: {
211: "UTF-16LE\0",
212: ONIG_ENCODING_UTF16_LE
213: },
214: #endif
215: #ifdef ONIG_ENCODING_UTF32_BE
216: {
217: "UCS-4\0UTF-32\0UTF-32BE\0",
218: ONIG_ENCODING_UTF32_BE
219: },
220: #endif
221: #ifdef ONIG_ENCODING_UTF32_LE
222: {
223: "UCS-4LE\0UTF-32LE\0",
224: ONIG_ENCODING_UTF32_LE
225: },
226: #endif
227: #ifdef ONIG_ENCODING_SJIS
228: {
229: "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
230: ONIG_ENCODING_SJIS
231: },
232: #endif
233: #ifdef ONIG_ENCODING_BIG5
234: {
235: "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
236: ONIG_ENCODING_BIG5
237: },
238: #endif
239: #ifdef ONIG_ENCODING_EUC_CN
240: {
241: "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
242: ONIG_ENCODING_EUC_CN
243: },
244: #endif
245: #ifdef ONIG_ENCODING_EUC_TW
246: {
247: "EUC-TW\0EUCTW\0EUC_TW\0",
248: ONIG_ENCODING_EUC_TW
249: },
250: #endif
251: #ifdef ONIG_ENCODING_EUC_KR
252: {
253: "EUC-KR\0EUCKR\0EUC_KR\0",
254: ONIG_ENCODING_EUC_KR
255: },
256: #endif
257: #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
258: {
259: "KOI8\0KOI-8\0",
260: ONIG_ENCODING_KOI8
261: },
262: #endif
263: #ifdef ONIG_ENCODING_KOI8_R
264: {
265: "KOI8R\0KOI8-R\0KOI-8R\0",
266: ONIG_ENCODING_KOI8_R
267: },
268: #endif
269: #ifdef ONIG_ENCODING_ISO_8859_1
270: {
271: "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
272: ONIG_ENCODING_ISO_8859_1
273: },
274: #endif
275: #ifdef ONIG_ENCODING_ISO_8859_2
276: {
277: "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
278: ONIG_ENCODING_ISO_8859_2
279: },
280: #endif
281: #ifdef ONIG_ENCODING_ISO_8859_3
282: {
283: "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
284: ONIG_ENCODING_ISO_8859_3
285: },
286: #endif
287: #ifdef ONIG_ENCODING_ISO_8859_4
288: {
289: "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
290: ONIG_ENCODING_ISO_8859_4
291: },
292: #endif
293: #ifdef ONIG_ENCODING_ISO_8859_5
294: {
295: "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
296: ONIG_ENCODING_ISO_8859_5
297: },
298: #endif
299: #ifdef ONIG_ENCODING_ISO_8859_6
300: {
301: "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
302: ONIG_ENCODING_ISO_8859_6
303: },
304: #endif
305: #ifdef ONIG_ENCODING_ISO_8859_7
306: {
307: "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
308: ONIG_ENCODING_ISO_8859_7
309: },
310: #endif
311: #ifdef ONIG_ENCODING_ISO_8859_8
312: {
313: "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
314: ONIG_ENCODING_ISO_8859_8
315: },
316: #endif
317: #ifdef ONIG_ENCODING_ISO_8859_9
318: {
319: "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
320: ONIG_ENCODING_ISO_8859_9
321: },
322: #endif
323: #ifdef ONIG_ENCODING_ISO_8859_10
324: {
325: "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
326: ONIG_ENCODING_ISO_8859_10
327: },
328: #endif
329: #ifdef ONIG_ENCODING_ISO_8859_11
330: {
331: "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
332: ONIG_ENCODING_ISO_8859_11
333: },
334: #endif
335: #ifdef ONIG_ENCODING_ISO_8859_13
336: {
337: "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
338: ONIG_ENCODING_ISO_8859_13
339: },
340: #endif
341: #ifdef ONIG_ENCODING_ISO_8859_14
342: {
343: "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
344: ONIG_ENCODING_ISO_8859_14
345: },
346: #endif
347: #ifdef ONIG_ENCODING_ISO_8859_15
348: {
349: "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
350: ONIG_ENCODING_ISO_8859_15
351: },
352: #endif
353: #ifdef ONIG_ENCODING_ISO_8859_16
354: {
355: "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
356: ONIG_ENCODING_ISO_8859_16
357: },
358: #endif
359: #ifdef ONIG_ENCODING_ASCII
360: {
361: "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
362: ONIG_ENCODING_ASCII
363: },
364: #endif
365: { NULL, ONIG_ENCODING_UNDEF }
366: };
367: /* }}} */
368:
369: /* {{{ php_mb_regex_name2mbctype */
370: static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
371: {
372: const char *p;
373: php_mb_regex_enc_name_map_t *mapping;
374:
1.1.1.2 misho 375: if (pname == NULL || !*pname) {
1.1 misho 376: return ONIG_ENCODING_UNDEF;
377: }
378:
379: for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
380: for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
381: if (strcasecmp(p, pname) == 0) {
382: return mapping->code;
383: }
384: }
385: }
386:
387: return ONIG_ENCODING_UNDEF;
388: }
389: /* }}} */
390:
391: /* {{{ php_mb_regex_mbctype2name */
392: static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
393: {
394: php_mb_regex_enc_name_map_t *mapping;
395:
396: for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
397: if (mapping->code == mbctype) {
398: return mapping->names;
399: }
400: }
401:
402: return NULL;
403: }
404: /* }}} */
405:
406: /* {{{ php_mb_regex_set_mbctype */
407: int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
408: {
409: OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
410: if (mbctype == ONIG_ENCODING_UNDEF) {
411: return FAILURE;
412: }
413: MBREX(current_mbctype) = mbctype;
414: return SUCCESS;
415: }
416: /* }}} */
417:
418: /* {{{ php_mb_regex_set_default_mbctype */
419: int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
420: {
421: OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
422: if (mbctype == ONIG_ENCODING_UNDEF) {
423: return FAILURE;
424: }
425: MBREX(default_mbctype) = mbctype;
426: return SUCCESS;
427: }
428: /* }}} */
429:
430: /* {{{ php_mb_regex_get_mbctype */
431: const char *php_mb_regex_get_mbctype(TSRMLS_D)
432: {
433: return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
434: }
435: /* }}} */
436:
437: /* {{{ php_mb_regex_get_default_mbctype */
438: const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
439: {
440: return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
441: }
442: /* }}} */
443:
444: /*
445: * regex cache
446: */
447: /* {{{ php_mbregex_compile_pattern */
448: static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
449: {
450: int err_code = 0;
451: int found = 0;
452: php_mb_regex_t *retval = NULL, **rc = NULL;
453: OnigErrorInfo err_info;
454: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
455:
456: found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
457: if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
458: if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
459: onig_error_code_to_str(err_str, err_code, err_info);
460: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
461: retval = NULL;
462: goto out;
463: }
464: zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
465: } else if (found == SUCCESS) {
466: retval = *rc;
467: }
468: out:
469: return retval;
470: }
471: /* }}} */
472:
473: /* {{{ _php_mb_regex_get_option_string */
474: static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
475: {
476: size_t len_left = len;
477: size_t len_req = 0;
478: char *p = str;
479: char c;
480:
481: if ((option & ONIG_OPTION_IGNORECASE) != 0) {
482: if (len_left > 0) {
483: --len_left;
484: *(p++) = 'i';
485: }
486: ++len_req;
487: }
488:
489: if ((option & ONIG_OPTION_EXTEND) != 0) {
490: if (len_left > 0) {
491: --len_left;
492: *(p++) = 'x';
493: }
494: ++len_req;
495: }
496:
497: if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
498: (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
499: if (len_left > 0) {
500: --len_left;
501: *(p++) = 'p';
502: }
503: ++len_req;
504: } else {
505: if ((option & ONIG_OPTION_MULTILINE) != 0) {
506: if (len_left > 0) {
507: --len_left;
508: *(p++) = 'm';
509: }
510: ++len_req;
511: }
512:
513: if ((option & ONIG_OPTION_SINGLELINE) != 0) {
514: if (len_left > 0) {
515: --len_left;
516: *(p++) = 's';
517: }
518: ++len_req;
519: }
520: }
521: if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
522: if (len_left > 0) {
523: --len_left;
524: *(p++) = 'l';
525: }
526: ++len_req;
527: }
528: if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
529: if (len_left > 0) {
530: --len_left;
531: *(p++) = 'n';
532: }
533: ++len_req;
534: }
535:
536: c = 0;
537:
538: if (syntax == ONIG_SYNTAX_JAVA) {
539: c = 'j';
540: } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
541: c = 'u';
542: } else if (syntax == ONIG_SYNTAX_GREP) {
543: c = 'g';
544: } else if (syntax == ONIG_SYNTAX_EMACS) {
545: c = 'c';
546: } else if (syntax == ONIG_SYNTAX_RUBY) {
547: c = 'r';
548: } else if (syntax == ONIG_SYNTAX_PERL) {
549: c = 'z';
550: } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
551: c = 'b';
552: } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
553: c = 'd';
554: }
555:
556: if (c != 0) {
557: if (len_left > 0) {
558: --len_left;
559: *(p++) = c;
560: }
561: ++len_req;
562: }
563:
564:
565: if (len_left > 0) {
566: --len_left;
567: *(p++) = '\0';
568: }
569: ++len_req;
570: if (len < len_req) {
571: return len_req;
572: }
573:
574: return 0;
575: }
576: /* }}} */
577:
578: /* {{{ _php_mb_regex_init_options */
579: static void
580: _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
581: {
582: int n;
583: char c;
584: int optm = 0;
585:
586: *syntax = ONIG_SYNTAX_RUBY;
587:
588: if (parg != NULL) {
589: n = 0;
590: while(n < narg) {
591: c = parg[n++];
592: switch (c) {
593: case 'i':
594: optm |= ONIG_OPTION_IGNORECASE;
595: break;
596: case 'x':
597: optm |= ONIG_OPTION_EXTEND;
598: break;
599: case 'm':
600: optm |= ONIG_OPTION_MULTILINE;
601: break;
602: case 's':
603: optm |= ONIG_OPTION_SINGLELINE;
604: break;
605: case 'p':
606: optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
607: break;
608: case 'l':
609: optm |= ONIG_OPTION_FIND_LONGEST;
610: break;
611: case 'n':
612: optm |= ONIG_OPTION_FIND_NOT_EMPTY;
613: break;
614: case 'j':
615: *syntax = ONIG_SYNTAX_JAVA;
616: break;
617: case 'u':
618: *syntax = ONIG_SYNTAX_GNU_REGEX;
619: break;
620: case 'g':
621: *syntax = ONIG_SYNTAX_GREP;
622: break;
623: case 'c':
624: *syntax = ONIG_SYNTAX_EMACS;
625: break;
626: case 'r':
627: *syntax = ONIG_SYNTAX_RUBY;
628: break;
629: case 'z':
630: *syntax = ONIG_SYNTAX_PERL;
631: break;
632: case 'b':
633: *syntax = ONIG_SYNTAX_POSIX_BASIC;
634: break;
635: case 'd':
636: *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
637: break;
638: case 'e':
639: if (eval != NULL) *eval = 1;
640: break;
641: default:
642: break;
643: }
644: }
645: if (option != NULL) *option|=optm;
646: }
647: }
648: /* }}} */
649:
650: /*
1.1.1.3 misho 651: * php functions
1.1 misho 652: */
653:
654: /* {{{ proto string mb_regex_encoding([string encoding])
655: Returns the current encoding for regex as a string. */
656: PHP_FUNCTION(mb_regex_encoding)
657: {
658: size_t argc = ZEND_NUM_ARGS();
659: char *encoding;
660: int encoding_len;
661: OnigEncoding mbctype;
662:
663: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
664: return;
665: }
666:
667: if (argc == 0) {
668: const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
669:
670: if (retval == NULL) {
671: RETURN_FALSE;
672: }
673:
674: RETURN_STRING((char *)retval, 1);
675: } else if (argc == 1) {
676: mbctype = _php_mb_regex_name2mbctype(encoding);
677:
678: if (mbctype == ONIG_ENCODING_UNDEF) {
679: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
680: RETURN_FALSE;
681: }
682:
683: MBREX(current_mbctype) = mbctype;
684: RETURN_TRUE;
685: }
686: }
687: /* }}} */
688:
689: /* {{{ _php_mb_regex_ereg_exec */
690: static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
691: {
692: zval **arg_pattern, *array;
693: char *string;
694: int string_len;
695: php_mb_regex_t *re;
696: OnigRegion *regs = NULL;
697: int i, match_len, beg, end;
698: OnigOptionType options;
699: char *str;
700:
701: array = NULL;
702:
703: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704: RETURN_FALSE;
705: }
706:
707: options = MBREX(regex_default_options);
708: if (icase) {
709: options |= ONIG_OPTION_IGNORECASE;
710: }
711:
712: /* compile the regular expression from the supplied regex */
713: if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
714: /* we convert numbers to integers and treat them as a string */
715: if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
716: convert_to_long_ex(arg_pattern); /* get rid of decimal places */
717: }
718: convert_to_string_ex(arg_pattern);
719: /* don't bother doing an extended regex with just a number */
720: }
721:
722: if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
723: php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
724: RETVAL_FALSE;
725: goto out;
726: }
727:
728: re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
729: if (re == NULL) {
730: RETVAL_FALSE;
731: goto out;
732: }
733:
734: regs = onig_region_new();
735:
736: /* actually execute the regular expression */
737: if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
738: RETVAL_FALSE;
739: goto out;
740: }
741:
742: match_len = 1;
743: str = string;
744: if (array != NULL) {
745: match_len = regs->end[0] - regs->beg[0];
746: zval_dtor(array);
747: array_init(array);
748: for (i = 0; i < regs->num_regs; i++) {
749: beg = regs->beg[i];
750: end = regs->end[i];
751: if (beg >= 0 && beg < end && end <= string_len) {
752: add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
753: } else {
754: add_index_bool(array, i, 0);
755: }
756: }
757: }
758:
759: if (match_len == 0) {
760: match_len = 1;
761: }
762: RETVAL_LONG(match_len);
763: out:
764: if (regs != NULL) {
765: onig_region_free(regs, 1);
766: }
767: }
768: /* }}} */
769:
770: /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771: Regular expression match for multibyte string */
772: PHP_FUNCTION(mb_ereg)
773: {
774: _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775: }
776: /* }}} */
777:
778: /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779: Case-insensitive regular expression match for multibyte string */
780: PHP_FUNCTION(mb_eregi)
781: {
782: _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783: }
784: /* }}} */
785:
786: /* {{{ _php_mb_regex_ereg_replace_exec */
1.1.1.2 misho 787: static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
1.1 misho 788: {
789: zval **arg_pattern_zval;
790:
791: char *arg_pattern;
792: int arg_pattern_len;
793:
794: char *replace;
795: int replace_len;
796:
1.1.1.2 misho 797: zend_fcall_info arg_replace_fci;
798: zend_fcall_info_cache arg_replace_fci_cache;
799:
1.1 misho 800: char *string;
801: int string_len;
802:
803: char *p;
804: php_mb_regex_t *re;
805: OnigSyntaxType *syntax;
806: OnigRegion *regs = NULL;
807: smart_str out_buf = { 0 };
808: smart_str eval_buf = { 0 };
809: smart_str *pbuf;
810: int i, err, eval, n;
811: OnigUChar *pos;
812: OnigUChar *string_lim;
813: char *description = NULL;
814: char pat_buf[2];
815:
816: const mbfl_encoding *enc;
817:
818: {
819: const char *current_enc_name;
820: current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
821: if (current_enc_name == NULL ||
822: (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
823: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
824: RETURN_FALSE;
825: }
826: }
827: eval = 0;
828: {
829: char *option_str = NULL;
830: int option_str_len = 0;
831:
1.1.1.2 misho 832: if (!is_callable) {
833: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
834: &arg_pattern_zval,
835: &replace, &replace_len,
836: &string, &string_len,
837: &option_str, &option_str_len) == FAILURE) {
838: RETURN_FALSE;
839: }
840: } else {
841: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
842: &arg_pattern_zval,
843: &arg_replace_fci, &arg_replace_fci_cache,
844: &string, &string_len,
845: &option_str, &option_str_len) == FAILURE) {
846: RETURN_FALSE;
847: }
1.1 misho 848: }
849:
850: if (option_str != NULL) {
851: _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
852: } else {
853: options |= MBREX(regex_default_options);
854: syntax = MBREX(regex_default_syntax);
855: }
856: }
857: if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
858: arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
859: arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
860: } else {
861: /* FIXME: this code is not multibyte aware! */
862: convert_to_long_ex(arg_pattern_zval);
863: pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
864: pat_buf[1] = '\0';
865:
866: arg_pattern = pat_buf;
867: arg_pattern_len = 1;
868: }
869: /* create regex pattern buffer */
870: re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
871: if (re == NULL) {
872: RETURN_FALSE;
873: }
874:
1.1.1.2 misho 875: if (eval || is_callable) {
1.1 misho 876: pbuf = &eval_buf;
877: description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
878: } else {
879: pbuf = &out_buf;
880: description = NULL;
881: }
882:
1.1.1.2 misho 883: if (is_callable) {
884: if (eval) {
885: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
886: RETURN_FALSE;
887: }
888: }
889:
1.1 misho 890: /* do the actual work */
891: err = 0;
892: pos = (OnigUChar *)string;
893: string_lim = (OnigUChar*)(string + string_len);
894: regs = onig_region_new();
895: while (err >= 0) {
896: err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
897: if (err <= -2) {
898: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
899: onig_error_code_to_str(err_str, err);
900: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
901: break;
902: }
903: if (err >= 0) {
904: #if moriyoshi_0
905: if (regs->beg[0] == regs->end[0]) {
906: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
907: break;
908: }
909: #endif
910: /* copy the part of the string before the match */
911: smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
1.1.1.2 misho 912:
913: if (!is_callable) {
914: /* copy replacement and backrefs */
915: i = 0;
916: p = replace;
917: while (i < replace_len) {
918: int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
919: n = -1;
920: if ((replace_len - i) >= 2 && fwd == 1 &&
1.1 misho 921: p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
1.1.1.2 misho 922: n = p[1] - '0';
923: }
924: if (n >= 0 && n < regs->num_regs) {
925: if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
926: smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
927: }
928: p += 2;
929: i += 2;
930: } else {
931: smart_str_appendl(pbuf, p, fwd);
932: p += fwd;
933: i += fwd;
1.1 misho 934: }
935: }
936: }
1.1.1.2 misho 937:
1.1 misho 938: if (eval) {
939: zval v;
940: /* null terminate buffer */
941: smart_str_0(&eval_buf);
942: /* do eval */
943: if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
944: efree(description);
945: php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
946: /* zend_error() does not return in this case */
947: }
948:
949: /* result of eval */
950: convert_to_string(&v);
951: smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
952: /* Clean up */
953: eval_buf.len = 0;
954: zval_dtor(&v);
1.1.1.2 misho 955: } else if (is_callable) {
956: zval *retval_ptr;
957: zval **args[1];
958: zval *subpats;
959: int i;
960:
961: MAKE_STD_ZVAL(subpats);
962: array_init(subpats);
963:
964: for (i = 0; i < regs->num_regs; i++) {
965: add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
966: }
967:
968: args[0] = &subpats;
969: /* null terminate buffer */
970: smart_str_0(&eval_buf);
971:
972: arg_replace_fci.param_count = 1;
973: arg_replace_fci.params = args;
974: arg_replace_fci.retval_ptr_ptr = &retval_ptr;
975: if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) {
976: convert_to_string_ex(&retval_ptr);
977: smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
978: eval_buf.len = 0;
979: zval_ptr_dtor(&retval_ptr);
980: } else {
981: efree(description);
982: if (!EG(exception)) {
983: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
984: }
985: }
986: zval_ptr_dtor(&subpats);
1.1 misho 987: }
1.1.1.2 misho 988:
1.1 misho 989: n = regs->end[0];
990: if ((pos - (OnigUChar *)string) < n) {
991: pos = (OnigUChar *)string + n;
992: } else {
993: if (pos < string_lim) {
994: smart_str_appendl(&out_buf, pos, 1);
995: }
996: pos++;
997: }
998: } else { /* nomatch */
999: /* stick that last bit of string on our output */
1000: if (string_lim - pos > 0) {
1001: smart_str_appendl(&out_buf, pos, string_lim - pos);
1002: }
1003: }
1004: onig_region_free(regs, 0);
1005: }
1006:
1007: if (description) {
1008: efree(description);
1009: }
1010: if (regs != NULL) {
1011: onig_region_free(regs, 1);
1012: }
1013: smart_str_free(&eval_buf);
1014:
1015: if (err <= -2) {
1016: smart_str_free(&out_buf);
1017: RETVAL_FALSE;
1018: } else {
1019: smart_str_appendc(&out_buf, '\0');
1020: RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
1021: }
1022: }
1023: /* }}} */
1024:
1025: /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1026: Replace regular expression for multibyte string */
1027: PHP_FUNCTION(mb_ereg_replace)
1028: {
1.1.1.2 misho 1029: _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1.1 misho 1030: }
1031: /* }}} */
1032:
1033: /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1034: Case insensitive replace regular expression for multibyte string */
1035: PHP_FUNCTION(mb_eregi_replace)
1036: {
1.1.1.2 misho 1037: _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1038: }
1039: /* }}} */
1040:
1041: /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1042: regular expression for multibyte string using replacement callback */
1043: PHP_FUNCTION(mb_ereg_replace_callback)
1044: {
1045: _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1.1 misho 1046: }
1047: /* }}} */
1048:
1049: /* {{{ proto array mb_split(string pattern, string string [, int limit])
1050: split multibyte string into array by regular expression */
1051: PHP_FUNCTION(mb_split)
1052: {
1053: char *arg_pattern;
1054: int arg_pattern_len;
1055: php_mb_regex_t *re;
1056: OnigRegion *regs = NULL;
1057: char *string;
1.1.1.3 misho 1058: OnigUChar *pos, *chunk_pos;
1.1 misho 1059: int string_len;
1060:
1061: int n, err;
1062: long count = -1;
1063:
1064: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1065: RETURN_FALSE;
1066: }
1067:
1.1.1.3 misho 1068: if (count > 0) {
1069: count--;
1.1 misho 1070: }
1071:
1072: /* create regex pattern buffer */
1073: if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1074: RETURN_FALSE;
1075: }
1076:
1077: array_init(return_value);
1078:
1.1.1.3 misho 1079: chunk_pos = pos = (OnigUChar *)string;
1.1 misho 1080: err = 0;
1081: regs = onig_region_new();
1082: /* churn through str, generating array entries as we go */
1.1.1.3 misho 1083: while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
1084: int beg, end;
1085: err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1086: if (err < 0) {
1.1 misho 1087: break;
1088: }
1.1.1.3 misho 1089: beg = regs->beg[0], end = regs->end[0];
1.1 misho 1090: /* add it to the array */
1.1.1.3 misho 1091: if ((pos - (OnigUChar *)string) < end) {
1092: if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1093: add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
1094: --count;
1095: } else {
1096: err = -2;
1097: break;
1098: }
1099: /* point at our new starting point */
1100: chunk_pos = pos = (OnigUChar *)string + end;
1.1 misho 1101: } else {
1.1.1.3 misho 1102: pos++;
1.1 misho 1103: }
1104: onig_region_free(regs, 0);
1105: }
1106:
1107: onig_region_free(regs, 1);
1108:
1109: /* see if we encountered an error */
1110: if (err <= -2) {
1111: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1112: onig_error_code_to_str(err_str, err);
1113: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1114: zval_dtor(return_value);
1115: RETURN_FALSE;
1116: }
1117:
1118: /* otherwise we just have one last element to add to the array */
1.1.1.3 misho 1119: n = ((OnigUChar *)(string + string_len) - chunk_pos);
1.1 misho 1120: if (n > 0) {
1.1.1.3 misho 1121: add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
1.1 misho 1122: } else {
1123: add_next_index_stringl(return_value, "", 0, 1);
1124: }
1125: }
1126: /* }}} */
1127:
1128: /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1129: Regular expression match for multibyte string */
1130: PHP_FUNCTION(mb_ereg_match)
1131: {
1132: char *arg_pattern;
1133: int arg_pattern_len;
1134:
1135: char *string;
1136: int string_len;
1137:
1138: php_mb_regex_t *re;
1139: OnigSyntaxType *syntax;
1140: OnigOptionType option = 0;
1141: int err;
1142:
1143: {
1144: char *option_str = NULL;
1145: int option_str_len = 0;
1146:
1147: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
1148: &arg_pattern, &arg_pattern_len, &string, &string_len,
1149: &option_str, &option_str_len)==FAILURE) {
1150: RETURN_FALSE;
1151: }
1152:
1153: if (option_str != NULL) {
1154: _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1155: } else {
1156: option |= MBREX(regex_default_options);
1157: syntax = MBREX(regex_default_syntax);
1158: }
1159: }
1160:
1161: if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1162: RETURN_FALSE;
1163: }
1164:
1165: /* match */
1166: err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1167: if (err >= 0) {
1168: RETVAL_TRUE;
1169: } else {
1170: RETVAL_FALSE;
1171: }
1172: }
1173: /* }}} */
1174:
1175: /* regex search */
1176: /* {{{ _php_mb_regex_ereg_search_exec */
1177: static void
1178: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1179: {
1180: size_t argc = ZEND_NUM_ARGS();
1181: char *arg_pattern, *arg_options;
1182: int arg_pattern_len, arg_options_len;
1183: int n, i, err, pos, len, beg, end;
1184: OnigOptionType option;
1185: OnigUChar *str;
1186: OnigSyntaxType *syntax;
1187:
1188: if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1189: return;
1190: }
1191:
1192: option = MBREX(regex_default_options);
1193:
1194: if (argc == 2) {
1195: option = 0;
1196: _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1197: }
1198:
1199: if (argc > 0) {
1200: /* create regex pattern buffer */
1201: if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1202: RETURN_FALSE;
1203: }
1204: }
1205:
1206: pos = MBREX(search_pos);
1207: str = NULL;
1208: len = 0;
1209: if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
1210: str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1211: len = Z_STRLEN_P(MBREX(search_str));
1212: }
1213:
1214: if (MBREX(search_re) == NULL) {
1215: php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
1216: RETURN_FALSE;
1217: }
1218:
1219: if (str == NULL) {
1220: php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
1221: RETURN_FALSE;
1222: }
1223:
1224: if (MBREX(search_regs)) {
1225: onig_region_free(MBREX(search_regs), 1);
1226: }
1227: MBREX(search_regs) = onig_region_new();
1228:
1229: err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
1230: if (err == ONIG_MISMATCH) {
1231: MBREX(search_pos) = len;
1232: RETVAL_FALSE;
1233: } else if (err <= -2) {
1234: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1235: onig_error_code_to_str(err_str, err);
1236: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1237: RETVAL_FALSE;
1238: } else {
1239: if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1240: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1241: }
1242: switch (mode) {
1243: case 1:
1244: array_init(return_value);
1245: beg = MBREX(search_regs)->beg[0];
1246: end = MBREX(search_regs)->end[0];
1247: add_next_index_long(return_value, beg);
1248: add_next_index_long(return_value, end - beg);
1249: break;
1250: case 2:
1251: array_init(return_value);
1252: n = MBREX(search_regs)->num_regs;
1253: for (i = 0; i < n; i++) {
1254: beg = MBREX(search_regs)->beg[i];
1255: end = MBREX(search_regs)->end[i];
1256: if (beg >= 0 && beg <= end && end <= len) {
1257: add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1258: } else {
1259: add_index_bool(return_value, i, 0);
1260: }
1261: }
1262: break;
1263: default:
1264: RETVAL_TRUE;
1265: break;
1266: }
1267: end = MBREX(search_regs)->end[0];
1268: if (pos < end) {
1269: MBREX(search_pos) = end;
1270: } else {
1271: MBREX(search_pos) = pos + 1;
1272: }
1273: }
1274:
1275: if (err < 0) {
1276: onig_region_free(MBREX(search_regs), 1);
1277: MBREX(search_regs) = (OnigRegion *)NULL;
1278: }
1279: }
1280: /* }}} */
1281:
1282: /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1283: Regular expression search for multibyte string */
1284: PHP_FUNCTION(mb_ereg_search)
1285: {
1286: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1287: }
1288: /* }}} */
1289:
1290: /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1291: Regular expression search for multibyte string */
1292: PHP_FUNCTION(mb_ereg_search_pos)
1293: {
1294: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1295: }
1296: /* }}} */
1297:
1298: /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1299: Regular expression search for multibyte string */
1300: PHP_FUNCTION(mb_ereg_search_regs)
1301: {
1302: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1303: }
1304: /* }}} */
1305:
1306: /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1307: Initialize string and regular expression for search. */
1308: PHP_FUNCTION(mb_ereg_search_init)
1309: {
1310: size_t argc = ZEND_NUM_ARGS();
1311: zval *arg_str;
1312: char *arg_pattern = NULL, *arg_options = NULL;
1313: int arg_pattern_len = 0, arg_options_len = 0;
1314: OnigSyntaxType *syntax = NULL;
1315: OnigOptionType option;
1316:
1317: if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1318: return;
1319: }
1320:
1321: if (argc > 1 && arg_pattern_len == 0) {
1322: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
1323: RETURN_FALSE;
1324: }
1325:
1326: option = MBREX(regex_default_options);
1327: syntax = MBREX(regex_default_syntax);
1328:
1329: if (argc == 3) {
1330: option = 0;
1331: _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1332: }
1333:
1334: if (argc > 1) {
1335: /* create regex pattern buffer */
1336: if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1337: RETURN_FALSE;
1338: }
1339: }
1340:
1341: if (MBREX(search_str) != NULL) {
1342: zval_ptr_dtor(&MBREX(search_str));
1343: MBREX(search_str) = (zval *)NULL;
1344: }
1345:
1346: MBREX(search_str) = arg_str;
1347: Z_ADDREF_P(MBREX(search_str));
1348: SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
1349:
1350: MBREX(search_pos) = 0;
1351:
1352: if (MBREX(search_regs) != NULL) {
1353: onig_region_free(MBREX(search_regs), 1);
1354: MBREX(search_regs) = (OnigRegion *) NULL;
1355: }
1356:
1357: RETURN_TRUE;
1358: }
1359: /* }}} */
1360:
1361: /* {{{ proto array mb_ereg_search_getregs(void)
1362: Get matched substring of the last time */
1363: PHP_FUNCTION(mb_ereg_search_getregs)
1364: {
1365: int n, i, len, beg, end;
1366: OnigUChar *str;
1367:
1368: if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
1369: array_init(return_value);
1370:
1371: str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1372: len = Z_STRLEN_P(MBREX(search_str));
1373: n = MBREX(search_regs)->num_regs;
1374: for (i = 0; i < n; i++) {
1375: beg = MBREX(search_regs)->beg[i];
1376: end = MBREX(search_regs)->end[i];
1377: if (beg >= 0 && beg <= end && end <= len) {
1378: add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1379: } else {
1380: add_index_bool(return_value, i, 0);
1381: }
1382: }
1383: } else {
1384: RETVAL_FALSE;
1385: }
1386: }
1387: /* }}} */
1388:
1389: /* {{{ proto int mb_ereg_search_getpos(void)
1390: Get search start position */
1391: PHP_FUNCTION(mb_ereg_search_getpos)
1392: {
1393: RETVAL_LONG(MBREX(search_pos));
1394: }
1395: /* }}} */
1396:
1397: /* {{{ proto bool mb_ereg_search_setpos(int position)
1398: Set search start position */
1399: PHP_FUNCTION(mb_ereg_search_setpos)
1400: {
1401: long position;
1402:
1403: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
1404: return;
1405: }
1406:
1407: if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
1408: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
1409: MBREX(search_pos) = 0;
1410: RETURN_FALSE;
1411: }
1412:
1413: MBREX(search_pos) = position;
1414: RETURN_TRUE;
1415: }
1416: /* }}} */
1417:
1418: /* {{{ php_mb_regex_set_options */
1419: static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
1420: {
1421: if (prev_options != NULL) {
1422: *prev_options = MBREX(regex_default_options);
1423: }
1424: if (prev_syntax != NULL) {
1425: *prev_syntax = MBREX(regex_default_syntax);
1426: }
1427: MBREX(regex_default_options) = options;
1428: MBREX(regex_default_syntax) = syntax;
1429: }
1430: /* }}} */
1431:
1432: /* {{{ proto string mb_regex_set_options([string options])
1433: Set or get the default options for mbregex functions */
1434: PHP_FUNCTION(mb_regex_set_options)
1435: {
1436: OnigOptionType opt;
1437: OnigSyntaxType *syntax;
1438: char *string = NULL;
1439: int string_len;
1440: char buf[16];
1441:
1442: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
1443: &string, &string_len) == FAILURE) {
1444: RETURN_FALSE;
1445: }
1446: if (string != NULL) {
1447: opt = 0;
1448: syntax = NULL;
1449: _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1450: _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
1451: } else {
1452: opt = MBREX(regex_default_options);
1453: syntax = MBREX(regex_default_syntax);
1454: }
1455: _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1456:
1457: RETVAL_STRING(buf, 1);
1458: }
1459: /* }}} */
1460:
1461: #endif /* HAVE_MBREGEX */
1462:
1463: /*
1464: * Local variables:
1465: * tab-width: 4
1466: * c-basic-offset: 4
1467: * End:
1468: * vim600: fdm=marker
1469: * vim: noet sw=4 ts=4
1470: */