Annotation of embedaddon/php/ext/mbstring/php_mbregex.c, revision 1.1
1.1 ! misho 1: /*
! 2: +----------------------------------------------------------------------+
! 3: | PHP Version 5 |
! 4: +----------------------------------------------------------------------+
! 5: | Copyright (c) 1997-2012 The PHP Group |
! 6: +----------------------------------------------------------------------+
! 7: | This source file is subject to version 3.01 of the PHP license, |
! 8: | that is bundled with this package in the file LICENSE, and is |
! 9: | available through the world-wide-web at the following url: |
! 10: | http://www.php.net/license/3_01.txt |
! 11: | If you did not receive a copy of the PHP license and are unable to |
! 12: | obtain it through the world-wide-web, please send a note to |
! 13: | license@php.net so we can mail you a copy immediately. |
! 14: +----------------------------------------------------------------------+
! 15: | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
! 16: +----------------------------------------------------------------------+
! 17: */
! 18:
! 19: /* $Id: php_mbregex.c 321634 2012-01-01 13:15:04Z felipe $ */
! 20:
! 21:
! 22: #ifdef HAVE_CONFIG_H
! 23: #include "config.h"
! 24: #endif
! 25:
! 26: #include "php.h"
! 27: #include "php_ini.h"
! 28:
! 29: #if HAVE_MBREGEX
! 30:
! 31: #include "ext/standard/php_smart_str.h"
! 32: #include "ext/standard/info.h"
! 33: #include "php_mbregex.h"
! 34: #include "mbstring.h"
! 35:
! 36: #include "php_onig_compat.h" /* must come prior to the oniguruma header */
! 37: #include <oniguruma.h>
! 38: #undef UChar
! 39:
! 40: ZEND_EXTERN_MODULE_GLOBALS(mbstring)
! 41:
! 42: struct _zend_mb_regex_globals {
! 43: OnigEncoding default_mbctype;
! 44: OnigEncoding current_mbctype;
! 45: HashTable ht_rc;
! 46: zval *search_str;
! 47: zval *search_str_val;
! 48: unsigned int search_pos;
! 49: php_mb_regex_t *search_re;
! 50: OnigRegion *search_regs;
! 51: OnigOptionType regex_default_options;
! 52: OnigSyntaxType *regex_default_syntax;
! 53: };
! 54:
! 55: #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
! 56:
! 57: /* {{{ static void php_mb_regex_free_cache() */
! 58: static void php_mb_regex_free_cache(php_mb_regex_t **pre)
! 59: {
! 60: onig_free(*pre);
! 61: }
! 62: /* }}} */
! 63:
! 64: /* {{{ _php_mb_regex_globals_ctor */
! 65: static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
! 66: {
! 67: pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
! 68: pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
! 69: zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
! 70: pglobals->search_str = (zval*) NULL;
! 71: pglobals->search_re = (php_mb_regex_t*)NULL;
! 72: pglobals->search_pos = 0;
! 73: pglobals->search_regs = (OnigRegion*)NULL;
! 74: pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
! 75: pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
! 76: return SUCCESS;
! 77: }
! 78: /* }}} */
! 79:
! 80: /* {{{ _php_mb_regex_globals_dtor */
! 81: static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
! 82: {
! 83: zend_hash_destroy(&pglobals->ht_rc);
! 84: }
! 85: /* }}} */
! 86:
! 87: /* {{{ php_mb_regex_globals_alloc */
! 88: zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
! 89: {
! 90: zend_mb_regex_globals *pglobals = pemalloc(
! 91: sizeof(zend_mb_regex_globals), 1);
! 92: if (!pglobals) {
! 93: return NULL;
! 94: }
! 95: if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
! 96: pefree(pglobals, 1);
! 97: return NULL;
! 98: }
! 99: return pglobals;
! 100: }
! 101: /* }}} */
! 102:
! 103: /* {{{ php_mb_regex_globals_free */
! 104: void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
! 105: {
! 106: if (!pglobals) {
! 107: return;
! 108: }
! 109: _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
! 110: pefree(pglobals, 1);
! 111: }
! 112: /* }}} */
! 113:
! 114: /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
! 115: PHP_MINIT_FUNCTION(mb_regex)
! 116: {
! 117: onig_init();
! 118: return SUCCESS;
! 119: }
! 120: /* }}} */
! 121:
! 122: /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
! 123: PHP_MSHUTDOWN_FUNCTION(mb_regex)
! 124: {
! 125: onig_end();
! 126: return SUCCESS;
! 127: }
! 128: /* }}} */
! 129:
! 130: /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
! 131: PHP_RINIT_FUNCTION(mb_regex)
! 132: {
! 133: return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
! 134: }
! 135: /* }}} */
! 136:
! 137: /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
! 138: PHP_RSHUTDOWN_FUNCTION(mb_regex)
! 139: {
! 140: MBREX(current_mbctype) = MBREX(default_mbctype);
! 141:
! 142: if (MBREX(search_str) != NULL) {
! 143: zval_ptr_dtor(&MBREX(search_str));
! 144: MBREX(search_str) = (zval *)NULL;
! 145: }
! 146: MBREX(search_pos) = 0;
! 147:
! 148: if (MBREX(search_regs) != NULL) {
! 149: onig_region_free(MBREX(search_regs), 1);
! 150: MBREX(search_regs) = (OnigRegion *)NULL;
! 151: }
! 152: zend_hash_clean(&MBREX(ht_rc));
! 153:
! 154: return SUCCESS;
! 155: }
! 156: /* }}} */
! 157:
! 158: /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
! 159: PHP_MINFO_FUNCTION(mb_regex)
! 160: {
! 161: char buf[32];
! 162: php_info_print_table_start();
! 163: php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
! 164: snprintf(buf, sizeof(buf), "%d.%d.%d",
! 165: ONIGURUMA_VERSION_MAJOR,
! 166: ONIGURUMA_VERSION_MINOR,
! 167: ONIGURUMA_VERSION_TEENY);
! 168: #ifdef PHP_ONIG_BUNDLED
! 169: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 170: php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
! 171: #else /* USE_COMBINATION_EXPLOSION_CHECK */
! 172: php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
! 173: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
! 174: #endif /* PHP_BUNDLED_ONIG */
! 175: php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
! 176: php_info_print_table_end();
! 177: }
! 178: /* }}} */
! 179:
! 180: /*
! 181: * encoding name resolver
! 182: */
! 183:
! 184: /* {{{ encoding name map */
! 185: typedef struct _php_mb_regex_enc_name_map_t {
! 186: const char *names;
! 187: OnigEncoding code;
! 188: } php_mb_regex_enc_name_map_t;
! 189:
! 190: php_mb_regex_enc_name_map_t enc_name_map[] = {
! 191: #ifdef ONIG_ENCODING_EUC_JP
! 192: {
! 193: "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
! 194: ONIG_ENCODING_EUC_JP
! 195: },
! 196: #endif
! 197: #ifdef ONIG_ENCODING_UTF8
! 198: {
! 199: "UTF-8\0UTF8\0",
! 200: ONIG_ENCODING_UTF8
! 201: },
! 202: #endif
! 203: #ifdef ONIG_ENCODING_UTF16_BE
! 204: {
! 205: "UTF-16\0UTF-16BE\0",
! 206: ONIG_ENCODING_UTF16_BE
! 207: },
! 208: #endif
! 209: #ifdef ONIG_ENCODING_UTF16_LE
! 210: {
! 211: "UTF-16LE\0",
! 212: ONIG_ENCODING_UTF16_LE
! 213: },
! 214: #endif
! 215: #ifdef ONIG_ENCODING_UTF32_BE
! 216: {
! 217: "UCS-4\0UTF-32\0UTF-32BE\0",
! 218: ONIG_ENCODING_UTF32_BE
! 219: },
! 220: #endif
! 221: #ifdef ONIG_ENCODING_UTF32_LE
! 222: {
! 223: "UCS-4LE\0UTF-32LE\0",
! 224: ONIG_ENCODING_UTF32_LE
! 225: },
! 226: #endif
! 227: #ifdef ONIG_ENCODING_SJIS
! 228: {
! 229: "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
! 230: ONIG_ENCODING_SJIS
! 231: },
! 232: #endif
! 233: #ifdef ONIG_ENCODING_BIG5
! 234: {
! 235: "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
! 236: ONIG_ENCODING_BIG5
! 237: },
! 238: #endif
! 239: #ifdef ONIG_ENCODING_EUC_CN
! 240: {
! 241: "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
! 242: ONIG_ENCODING_EUC_CN
! 243: },
! 244: #endif
! 245: #ifdef ONIG_ENCODING_EUC_TW
! 246: {
! 247: "EUC-TW\0EUCTW\0EUC_TW\0",
! 248: ONIG_ENCODING_EUC_TW
! 249: },
! 250: #endif
! 251: #ifdef ONIG_ENCODING_EUC_KR
! 252: {
! 253: "EUC-KR\0EUCKR\0EUC_KR\0",
! 254: ONIG_ENCODING_EUC_KR
! 255: },
! 256: #endif
! 257: #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
! 258: {
! 259: "KOI8\0KOI-8\0",
! 260: ONIG_ENCODING_KOI8
! 261: },
! 262: #endif
! 263: #ifdef ONIG_ENCODING_KOI8_R
! 264: {
! 265: "KOI8R\0KOI8-R\0KOI-8R\0",
! 266: ONIG_ENCODING_KOI8_R
! 267: },
! 268: #endif
! 269: #ifdef ONIG_ENCODING_ISO_8859_1
! 270: {
! 271: "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
! 272: ONIG_ENCODING_ISO_8859_1
! 273: },
! 274: #endif
! 275: #ifdef ONIG_ENCODING_ISO_8859_2
! 276: {
! 277: "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
! 278: ONIG_ENCODING_ISO_8859_2
! 279: },
! 280: #endif
! 281: #ifdef ONIG_ENCODING_ISO_8859_3
! 282: {
! 283: "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
! 284: ONIG_ENCODING_ISO_8859_3
! 285: },
! 286: #endif
! 287: #ifdef ONIG_ENCODING_ISO_8859_4
! 288: {
! 289: "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
! 290: ONIG_ENCODING_ISO_8859_4
! 291: },
! 292: #endif
! 293: #ifdef ONIG_ENCODING_ISO_8859_5
! 294: {
! 295: "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
! 296: ONIG_ENCODING_ISO_8859_5
! 297: },
! 298: #endif
! 299: #ifdef ONIG_ENCODING_ISO_8859_6
! 300: {
! 301: "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
! 302: ONIG_ENCODING_ISO_8859_6
! 303: },
! 304: #endif
! 305: #ifdef ONIG_ENCODING_ISO_8859_7
! 306: {
! 307: "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
! 308: ONIG_ENCODING_ISO_8859_7
! 309: },
! 310: #endif
! 311: #ifdef ONIG_ENCODING_ISO_8859_8
! 312: {
! 313: "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
! 314: ONIG_ENCODING_ISO_8859_8
! 315: },
! 316: #endif
! 317: #ifdef ONIG_ENCODING_ISO_8859_9
! 318: {
! 319: "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
! 320: ONIG_ENCODING_ISO_8859_9
! 321: },
! 322: #endif
! 323: #ifdef ONIG_ENCODING_ISO_8859_10
! 324: {
! 325: "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
! 326: ONIG_ENCODING_ISO_8859_10
! 327: },
! 328: #endif
! 329: #ifdef ONIG_ENCODING_ISO_8859_11
! 330: {
! 331: "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
! 332: ONIG_ENCODING_ISO_8859_11
! 333: },
! 334: #endif
! 335: #ifdef ONIG_ENCODING_ISO_8859_13
! 336: {
! 337: "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
! 338: ONIG_ENCODING_ISO_8859_13
! 339: },
! 340: #endif
! 341: #ifdef ONIG_ENCODING_ISO_8859_14
! 342: {
! 343: "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
! 344: ONIG_ENCODING_ISO_8859_14
! 345: },
! 346: #endif
! 347: #ifdef ONIG_ENCODING_ISO_8859_15
! 348: {
! 349: "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
! 350: ONIG_ENCODING_ISO_8859_15
! 351: },
! 352: #endif
! 353: #ifdef ONIG_ENCODING_ISO_8859_16
! 354: {
! 355: "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
! 356: ONIG_ENCODING_ISO_8859_16
! 357: },
! 358: #endif
! 359: #ifdef ONIG_ENCODING_ASCII
! 360: {
! 361: "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
! 362: ONIG_ENCODING_ASCII
! 363: },
! 364: #endif
! 365: { NULL, ONIG_ENCODING_UNDEF }
! 366: };
! 367: /* }}} */
! 368:
! 369: /* {{{ php_mb_regex_name2mbctype */
! 370: static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
! 371: {
! 372: const char *p;
! 373: php_mb_regex_enc_name_map_t *mapping;
! 374:
! 375: if (pname == NULL) {
! 376: return ONIG_ENCODING_UNDEF;
! 377: }
! 378:
! 379: for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
! 380: for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
! 381: if (strcasecmp(p, pname) == 0) {
! 382: return mapping->code;
! 383: }
! 384: }
! 385: }
! 386:
! 387: return ONIG_ENCODING_UNDEF;
! 388: }
! 389: /* }}} */
! 390:
! 391: /* {{{ php_mb_regex_mbctype2name */
! 392: static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
! 393: {
! 394: php_mb_regex_enc_name_map_t *mapping;
! 395:
! 396: for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
! 397: if (mapping->code == mbctype) {
! 398: return mapping->names;
! 399: }
! 400: }
! 401:
! 402: return NULL;
! 403: }
! 404: /* }}} */
! 405:
! 406: /* {{{ php_mb_regex_set_mbctype */
! 407: int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
! 408: {
! 409: OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
! 410: if (mbctype == ONIG_ENCODING_UNDEF) {
! 411: return FAILURE;
! 412: }
! 413: MBREX(current_mbctype) = mbctype;
! 414: return SUCCESS;
! 415: }
! 416: /* }}} */
! 417:
! 418: /* {{{ php_mb_regex_set_default_mbctype */
! 419: int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
! 420: {
! 421: OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
! 422: if (mbctype == ONIG_ENCODING_UNDEF) {
! 423: return FAILURE;
! 424: }
! 425: MBREX(default_mbctype) = mbctype;
! 426: return SUCCESS;
! 427: }
! 428: /* }}} */
! 429:
! 430: /* {{{ php_mb_regex_get_mbctype */
! 431: const char *php_mb_regex_get_mbctype(TSRMLS_D)
! 432: {
! 433: return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
! 434: }
! 435: /* }}} */
! 436:
! 437: /* {{{ php_mb_regex_get_default_mbctype */
! 438: const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
! 439: {
! 440: return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
! 441: }
! 442: /* }}} */
! 443:
! 444: /*
! 445: * regex cache
! 446: */
! 447: /* {{{ php_mbregex_compile_pattern */
! 448: static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
! 449: {
! 450: int err_code = 0;
! 451: int found = 0;
! 452: php_mb_regex_t *retval = NULL, **rc = NULL;
! 453: OnigErrorInfo err_info;
! 454: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
! 455:
! 456: found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
! 457: if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
! 458: if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
! 459: onig_error_code_to_str(err_str, err_code, err_info);
! 460: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
! 461: retval = NULL;
! 462: goto out;
! 463: }
! 464: zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
! 465: } else if (found == SUCCESS) {
! 466: retval = *rc;
! 467: }
! 468: out:
! 469: return retval;
! 470: }
! 471: /* }}} */
! 472:
! 473: /* {{{ _php_mb_regex_get_option_string */
! 474: static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
! 475: {
! 476: size_t len_left = len;
! 477: size_t len_req = 0;
! 478: char *p = str;
! 479: char c;
! 480:
! 481: if ((option & ONIG_OPTION_IGNORECASE) != 0) {
! 482: if (len_left > 0) {
! 483: --len_left;
! 484: *(p++) = 'i';
! 485: }
! 486: ++len_req;
! 487: }
! 488:
! 489: if ((option & ONIG_OPTION_EXTEND) != 0) {
! 490: if (len_left > 0) {
! 491: --len_left;
! 492: *(p++) = 'x';
! 493: }
! 494: ++len_req;
! 495: }
! 496:
! 497: if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
! 498: (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
! 499: if (len_left > 0) {
! 500: --len_left;
! 501: *(p++) = 'p';
! 502: }
! 503: ++len_req;
! 504: } else {
! 505: if ((option & ONIG_OPTION_MULTILINE) != 0) {
! 506: if (len_left > 0) {
! 507: --len_left;
! 508: *(p++) = 'm';
! 509: }
! 510: ++len_req;
! 511: }
! 512:
! 513: if ((option & ONIG_OPTION_SINGLELINE) != 0) {
! 514: if (len_left > 0) {
! 515: --len_left;
! 516: *(p++) = 's';
! 517: }
! 518: ++len_req;
! 519: }
! 520: }
! 521: if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
! 522: if (len_left > 0) {
! 523: --len_left;
! 524: *(p++) = 'l';
! 525: }
! 526: ++len_req;
! 527: }
! 528: if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
! 529: if (len_left > 0) {
! 530: --len_left;
! 531: *(p++) = 'n';
! 532: }
! 533: ++len_req;
! 534: }
! 535:
! 536: c = 0;
! 537:
! 538: if (syntax == ONIG_SYNTAX_JAVA) {
! 539: c = 'j';
! 540: } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
! 541: c = 'u';
! 542: } else if (syntax == ONIG_SYNTAX_GREP) {
! 543: c = 'g';
! 544: } else if (syntax == ONIG_SYNTAX_EMACS) {
! 545: c = 'c';
! 546: } else if (syntax == ONIG_SYNTAX_RUBY) {
! 547: c = 'r';
! 548: } else if (syntax == ONIG_SYNTAX_PERL) {
! 549: c = 'z';
! 550: } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
! 551: c = 'b';
! 552: } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
! 553: c = 'd';
! 554: }
! 555:
! 556: if (c != 0) {
! 557: if (len_left > 0) {
! 558: --len_left;
! 559: *(p++) = c;
! 560: }
! 561: ++len_req;
! 562: }
! 563:
! 564:
! 565: if (len_left > 0) {
! 566: --len_left;
! 567: *(p++) = '\0';
! 568: }
! 569: ++len_req;
! 570: if (len < len_req) {
! 571: return len_req;
! 572: }
! 573:
! 574: return 0;
! 575: }
! 576: /* }}} */
! 577:
! 578: /* {{{ _php_mb_regex_init_options */
! 579: static void
! 580: _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
! 581: {
! 582: int n;
! 583: char c;
! 584: int optm = 0;
! 585:
! 586: *syntax = ONIG_SYNTAX_RUBY;
! 587:
! 588: if (parg != NULL) {
! 589: n = 0;
! 590: while(n < narg) {
! 591: c = parg[n++];
! 592: switch (c) {
! 593: case 'i':
! 594: optm |= ONIG_OPTION_IGNORECASE;
! 595: break;
! 596: case 'x':
! 597: optm |= ONIG_OPTION_EXTEND;
! 598: break;
! 599: case 'm':
! 600: optm |= ONIG_OPTION_MULTILINE;
! 601: break;
! 602: case 's':
! 603: optm |= ONIG_OPTION_SINGLELINE;
! 604: break;
! 605: case 'p':
! 606: optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
! 607: break;
! 608: case 'l':
! 609: optm |= ONIG_OPTION_FIND_LONGEST;
! 610: break;
! 611: case 'n':
! 612: optm |= ONIG_OPTION_FIND_NOT_EMPTY;
! 613: break;
! 614: case 'j':
! 615: *syntax = ONIG_SYNTAX_JAVA;
! 616: break;
! 617: case 'u':
! 618: *syntax = ONIG_SYNTAX_GNU_REGEX;
! 619: break;
! 620: case 'g':
! 621: *syntax = ONIG_SYNTAX_GREP;
! 622: break;
! 623: case 'c':
! 624: *syntax = ONIG_SYNTAX_EMACS;
! 625: break;
! 626: case 'r':
! 627: *syntax = ONIG_SYNTAX_RUBY;
! 628: break;
! 629: case 'z':
! 630: *syntax = ONIG_SYNTAX_PERL;
! 631: break;
! 632: case 'b':
! 633: *syntax = ONIG_SYNTAX_POSIX_BASIC;
! 634: break;
! 635: case 'd':
! 636: *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
! 637: break;
! 638: case 'e':
! 639: if (eval != NULL) *eval = 1;
! 640: break;
! 641: default:
! 642: break;
! 643: }
! 644: }
! 645: if (option != NULL) *option|=optm;
! 646: }
! 647: }
! 648: /* }}} */
! 649:
! 650: /*
! 651: * php funcions
! 652: */
! 653:
! 654: /* {{{ proto string mb_regex_encoding([string encoding])
! 655: Returns the current encoding for regex as a string. */
! 656: PHP_FUNCTION(mb_regex_encoding)
! 657: {
! 658: size_t argc = ZEND_NUM_ARGS();
! 659: char *encoding;
! 660: int encoding_len;
! 661: OnigEncoding mbctype;
! 662:
! 663: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
! 664: return;
! 665: }
! 666:
! 667: if (argc == 0) {
! 668: const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
! 669:
! 670: if (retval == NULL) {
! 671: RETURN_FALSE;
! 672: }
! 673:
! 674: RETURN_STRING((char *)retval, 1);
! 675: } else if (argc == 1) {
! 676: mbctype = _php_mb_regex_name2mbctype(encoding);
! 677:
! 678: if (mbctype == ONIG_ENCODING_UNDEF) {
! 679: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
! 680: RETURN_FALSE;
! 681: }
! 682:
! 683: MBREX(current_mbctype) = mbctype;
! 684: RETURN_TRUE;
! 685: }
! 686: }
! 687: /* }}} */
! 688:
! 689: /* {{{ _php_mb_regex_ereg_exec */
! 690: static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
! 691: {
! 692: zval **arg_pattern, *array;
! 693: char *string;
! 694: int string_len;
! 695: php_mb_regex_t *re;
! 696: OnigRegion *regs = NULL;
! 697: int i, match_len, beg, end;
! 698: OnigOptionType options;
! 699: char *str;
! 700:
! 701: array = NULL;
! 702:
! 703: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
! 704: RETURN_FALSE;
! 705: }
! 706:
! 707: options = MBREX(regex_default_options);
! 708: if (icase) {
! 709: options |= ONIG_OPTION_IGNORECASE;
! 710: }
! 711:
! 712: /* compile the regular expression from the supplied regex */
! 713: if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
! 714: /* we convert numbers to integers and treat them as a string */
! 715: if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
! 716: convert_to_long_ex(arg_pattern); /* get rid of decimal places */
! 717: }
! 718: convert_to_string_ex(arg_pattern);
! 719: /* don't bother doing an extended regex with just a number */
! 720: }
! 721:
! 722: if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
! 723: php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
! 724: RETVAL_FALSE;
! 725: goto out;
! 726: }
! 727:
! 728: re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
! 729: if (re == NULL) {
! 730: RETVAL_FALSE;
! 731: goto out;
! 732: }
! 733:
! 734: regs = onig_region_new();
! 735:
! 736: /* actually execute the regular expression */
! 737: if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
! 738: RETVAL_FALSE;
! 739: goto out;
! 740: }
! 741:
! 742: match_len = 1;
! 743: str = string;
! 744: if (array != NULL) {
! 745: match_len = regs->end[0] - regs->beg[0];
! 746: zval_dtor(array);
! 747: array_init(array);
! 748: for (i = 0; i < regs->num_regs; i++) {
! 749: beg = regs->beg[i];
! 750: end = regs->end[i];
! 751: if (beg >= 0 && beg < end && end <= string_len) {
! 752: add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
! 753: } else {
! 754: add_index_bool(array, i, 0);
! 755: }
! 756: }
! 757: }
! 758:
! 759: if (match_len == 0) {
! 760: match_len = 1;
! 761: }
! 762: RETVAL_LONG(match_len);
! 763: out:
! 764: if (regs != NULL) {
! 765: onig_region_free(regs, 1);
! 766: }
! 767: }
! 768: /* }}} */
! 769:
! 770: /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
! 771: Regular expression match for multibyte string */
! 772: PHP_FUNCTION(mb_ereg)
! 773: {
! 774: _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
! 775: }
! 776: /* }}} */
! 777:
! 778: /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
! 779: Case-insensitive regular expression match for multibyte string */
! 780: PHP_FUNCTION(mb_eregi)
! 781: {
! 782: _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
! 783: }
! 784: /* }}} */
! 785:
! 786: /* {{{ _php_mb_regex_ereg_replace_exec */
! 787: static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
! 788: {
! 789: zval **arg_pattern_zval;
! 790:
! 791: char *arg_pattern;
! 792: int arg_pattern_len;
! 793:
! 794: char *replace;
! 795: int replace_len;
! 796:
! 797: char *string;
! 798: int string_len;
! 799:
! 800: char *p;
! 801: php_mb_regex_t *re;
! 802: OnigSyntaxType *syntax;
! 803: OnigRegion *regs = NULL;
! 804: smart_str out_buf = { 0 };
! 805: smart_str eval_buf = { 0 };
! 806: smart_str *pbuf;
! 807: int i, err, eval, n;
! 808: OnigUChar *pos;
! 809: OnigUChar *string_lim;
! 810: char *description = NULL;
! 811: char pat_buf[2];
! 812:
! 813: const mbfl_encoding *enc;
! 814:
! 815: {
! 816: const char *current_enc_name;
! 817: current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
! 818: if (current_enc_name == NULL ||
! 819: (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
! 820: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
! 821: RETURN_FALSE;
! 822: }
! 823: }
! 824: eval = 0;
! 825: {
! 826: char *option_str = NULL;
! 827: int option_str_len = 0;
! 828:
! 829: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
! 830: &arg_pattern_zval,
! 831: &replace, &replace_len,
! 832: &string, &string_len,
! 833: &option_str, &option_str_len) == FAILURE) {
! 834: RETURN_FALSE;
! 835: }
! 836:
! 837: if (option_str != NULL) {
! 838: _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
! 839: } else {
! 840: options |= MBREX(regex_default_options);
! 841: syntax = MBREX(regex_default_syntax);
! 842: }
! 843: }
! 844: if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
! 845: arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
! 846: arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
! 847: } else {
! 848: /* FIXME: this code is not multibyte aware! */
! 849: convert_to_long_ex(arg_pattern_zval);
! 850: pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
! 851: pat_buf[1] = '\0';
! 852:
! 853: arg_pattern = pat_buf;
! 854: arg_pattern_len = 1;
! 855: }
! 856: /* create regex pattern buffer */
! 857: re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
! 858: if (re == NULL) {
! 859: RETURN_FALSE;
! 860: }
! 861:
! 862: if (eval) {
! 863: pbuf = &eval_buf;
! 864: description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
! 865: } else {
! 866: pbuf = &out_buf;
! 867: description = NULL;
! 868: }
! 869:
! 870: /* do the actual work */
! 871: err = 0;
! 872: pos = (OnigUChar *)string;
! 873: string_lim = (OnigUChar*)(string + string_len);
! 874: regs = onig_region_new();
! 875: while (err >= 0) {
! 876: err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
! 877: if (err <= -2) {
! 878: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
! 879: onig_error_code_to_str(err_str, err);
! 880: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
! 881: break;
! 882: }
! 883: if (err >= 0) {
! 884: #if moriyoshi_0
! 885: if (regs->beg[0] == regs->end[0]) {
! 886: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
! 887: break;
! 888: }
! 889: #endif
! 890: /* copy the part of the string before the match */
! 891: smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
! 892: /* copy replacement and backrefs */
! 893: i = 0;
! 894: p = replace;
! 895: while (i < replace_len) {
! 896: int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
! 897: n = -1;
! 898: if ((replace_len - i) >= 2 && fwd == 1 &&
! 899: p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
! 900: n = p[1] - '0';
! 901: }
! 902: if (n >= 0 && n < regs->num_regs) {
! 903: if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
! 904: smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
! 905: }
! 906: p += 2;
! 907: i += 2;
! 908: } else {
! 909: smart_str_appendl(pbuf, p, fwd);
! 910: p += fwd;
! 911: i += fwd;
! 912: }
! 913: }
! 914: if (eval) {
! 915: zval v;
! 916: /* null terminate buffer */
! 917: smart_str_0(&eval_buf);
! 918: /* do eval */
! 919: if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
! 920: efree(description);
! 921: php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
! 922: /* zend_error() does not return in this case */
! 923: }
! 924:
! 925: /* result of eval */
! 926: convert_to_string(&v);
! 927: smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
! 928: /* Clean up */
! 929: eval_buf.len = 0;
! 930: zval_dtor(&v);
! 931: }
! 932: n = regs->end[0];
! 933: if ((pos - (OnigUChar *)string) < n) {
! 934: pos = (OnigUChar *)string + n;
! 935: } else {
! 936: if (pos < string_lim) {
! 937: smart_str_appendl(&out_buf, pos, 1);
! 938: }
! 939: pos++;
! 940: }
! 941: } else { /* nomatch */
! 942: /* stick that last bit of string on our output */
! 943: if (string_lim - pos > 0) {
! 944: smart_str_appendl(&out_buf, pos, string_lim - pos);
! 945: }
! 946: }
! 947: onig_region_free(regs, 0);
! 948: }
! 949:
! 950: if (description) {
! 951: efree(description);
! 952: }
! 953: if (regs != NULL) {
! 954: onig_region_free(regs, 1);
! 955: }
! 956: smart_str_free(&eval_buf);
! 957:
! 958: if (err <= -2) {
! 959: smart_str_free(&out_buf);
! 960: RETVAL_FALSE;
! 961: } else {
! 962: smart_str_appendc(&out_buf, '\0');
! 963: RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
! 964: }
! 965: }
! 966: /* }}} */
! 967:
! 968: /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
! 969: Replace regular expression for multibyte string */
! 970: PHP_FUNCTION(mb_ereg_replace)
! 971: {
! 972: _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
! 973: }
! 974: /* }}} */
! 975:
! 976: /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
! 977: Case insensitive replace regular expression for multibyte string */
! 978: PHP_FUNCTION(mb_eregi_replace)
! 979: {
! 980: _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
! 981: }
! 982: /* }}} */
! 983:
! 984: /* {{{ proto array mb_split(string pattern, string string [, int limit])
! 985: split multibyte string into array by regular expression */
! 986: PHP_FUNCTION(mb_split)
! 987: {
! 988: char *arg_pattern;
! 989: int arg_pattern_len;
! 990: php_mb_regex_t *re;
! 991: OnigRegion *regs = NULL;
! 992: char *string;
! 993: OnigUChar *pos;
! 994: int string_len;
! 995:
! 996: int n, err;
! 997: long count = -1;
! 998:
! 999: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
! 1000: RETURN_FALSE;
! 1001: }
! 1002:
! 1003: if (count == 0) {
! 1004: count = 1;
! 1005: }
! 1006:
! 1007: /* create regex pattern buffer */
! 1008: if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
! 1009: RETURN_FALSE;
! 1010: }
! 1011:
! 1012: array_init(return_value);
! 1013:
! 1014: pos = (OnigUChar *)string;
! 1015: err = 0;
! 1016: regs = onig_region_new();
! 1017: /* churn through str, generating array entries as we go */
! 1018: while ((--count != 0) &&
! 1019: (err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
! 1020: if (regs->beg[0] == regs->end[0]) {
! 1021: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
! 1022: break;
! 1023: }
! 1024:
! 1025: /* add it to the array */
! 1026: if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
! 1027: add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
! 1028: } else {
! 1029: err = -2;
! 1030: break;
! 1031: }
! 1032: /* point at our new starting point */
! 1033: n = regs->end[0];
! 1034: if ((pos - (OnigUChar *)string) < n) {
! 1035: pos = (OnigUChar *)string + n;
! 1036: }
! 1037: if (count < 0) {
! 1038: count = 0;
! 1039: }
! 1040: onig_region_free(regs, 0);
! 1041: }
! 1042:
! 1043: onig_region_free(regs, 1);
! 1044:
! 1045: /* see if we encountered an error */
! 1046: if (err <= -2) {
! 1047: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
! 1048: onig_error_code_to_str(err_str, err);
! 1049: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
! 1050: zval_dtor(return_value);
! 1051: RETURN_FALSE;
! 1052: }
! 1053:
! 1054: /* otherwise we just have one last element to add to the array */
! 1055: n = ((OnigUChar *)(string + string_len) - pos);
! 1056: if (n > 0) {
! 1057: add_next_index_stringl(return_value, (char *)pos, n, 1);
! 1058: } else {
! 1059: add_next_index_stringl(return_value, "", 0, 1);
! 1060: }
! 1061: }
! 1062: /* }}} */
! 1063:
! 1064: /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
! 1065: Regular expression match for multibyte string */
! 1066: PHP_FUNCTION(mb_ereg_match)
! 1067: {
! 1068: char *arg_pattern;
! 1069: int arg_pattern_len;
! 1070:
! 1071: char *string;
! 1072: int string_len;
! 1073:
! 1074: php_mb_regex_t *re;
! 1075: OnigSyntaxType *syntax;
! 1076: OnigOptionType option = 0;
! 1077: int err;
! 1078:
! 1079: {
! 1080: char *option_str = NULL;
! 1081: int option_str_len = 0;
! 1082:
! 1083: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
! 1084: &arg_pattern, &arg_pattern_len, &string, &string_len,
! 1085: &option_str, &option_str_len)==FAILURE) {
! 1086: RETURN_FALSE;
! 1087: }
! 1088:
! 1089: if (option_str != NULL) {
! 1090: _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
! 1091: } else {
! 1092: option |= MBREX(regex_default_options);
! 1093: syntax = MBREX(regex_default_syntax);
! 1094: }
! 1095: }
! 1096:
! 1097: if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
! 1098: RETURN_FALSE;
! 1099: }
! 1100:
! 1101: /* match */
! 1102: err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
! 1103: if (err >= 0) {
! 1104: RETVAL_TRUE;
! 1105: } else {
! 1106: RETVAL_FALSE;
! 1107: }
! 1108: }
! 1109: /* }}} */
! 1110:
! 1111: /* regex search */
! 1112: /* {{{ _php_mb_regex_ereg_search_exec */
! 1113: static void
! 1114: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
! 1115: {
! 1116: size_t argc = ZEND_NUM_ARGS();
! 1117: char *arg_pattern, *arg_options;
! 1118: int arg_pattern_len, arg_options_len;
! 1119: int n, i, err, pos, len, beg, end;
! 1120: OnigOptionType option;
! 1121: OnigUChar *str;
! 1122: OnigSyntaxType *syntax;
! 1123:
! 1124: if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
! 1125: return;
! 1126: }
! 1127:
! 1128: option = MBREX(regex_default_options);
! 1129:
! 1130: if (argc == 2) {
! 1131: option = 0;
! 1132: _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
! 1133: }
! 1134:
! 1135: if (argc > 0) {
! 1136: /* create regex pattern buffer */
! 1137: if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
! 1138: RETURN_FALSE;
! 1139: }
! 1140: }
! 1141:
! 1142: pos = MBREX(search_pos);
! 1143: str = NULL;
! 1144: len = 0;
! 1145: if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
! 1146: str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
! 1147: len = Z_STRLEN_P(MBREX(search_str));
! 1148: }
! 1149:
! 1150: if (MBREX(search_re) == NULL) {
! 1151: php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
! 1152: RETURN_FALSE;
! 1153: }
! 1154:
! 1155: if (str == NULL) {
! 1156: php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
! 1157: RETURN_FALSE;
! 1158: }
! 1159:
! 1160: if (MBREX(search_regs)) {
! 1161: onig_region_free(MBREX(search_regs), 1);
! 1162: }
! 1163: MBREX(search_regs) = onig_region_new();
! 1164:
! 1165: err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
! 1166: if (err == ONIG_MISMATCH) {
! 1167: MBREX(search_pos) = len;
! 1168: RETVAL_FALSE;
! 1169: } else if (err <= -2) {
! 1170: OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
! 1171: onig_error_code_to_str(err_str, err);
! 1172: php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
! 1173: RETVAL_FALSE;
! 1174: } else {
! 1175: if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
! 1176: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
! 1177: }
! 1178: switch (mode) {
! 1179: case 1:
! 1180: array_init(return_value);
! 1181: beg = MBREX(search_regs)->beg[0];
! 1182: end = MBREX(search_regs)->end[0];
! 1183: add_next_index_long(return_value, beg);
! 1184: add_next_index_long(return_value, end - beg);
! 1185: break;
! 1186: case 2:
! 1187: array_init(return_value);
! 1188: n = MBREX(search_regs)->num_regs;
! 1189: for (i = 0; i < n; i++) {
! 1190: beg = MBREX(search_regs)->beg[i];
! 1191: end = MBREX(search_regs)->end[i];
! 1192: if (beg >= 0 && beg <= end && end <= len) {
! 1193: add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
! 1194: } else {
! 1195: add_index_bool(return_value, i, 0);
! 1196: }
! 1197: }
! 1198: break;
! 1199: default:
! 1200: RETVAL_TRUE;
! 1201: break;
! 1202: }
! 1203: end = MBREX(search_regs)->end[0];
! 1204: if (pos < end) {
! 1205: MBREX(search_pos) = end;
! 1206: } else {
! 1207: MBREX(search_pos) = pos + 1;
! 1208: }
! 1209: }
! 1210:
! 1211: if (err < 0) {
! 1212: onig_region_free(MBREX(search_regs), 1);
! 1213: MBREX(search_regs) = (OnigRegion *)NULL;
! 1214: }
! 1215: }
! 1216: /* }}} */
! 1217:
! 1218: /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
! 1219: Regular expression search for multibyte string */
! 1220: PHP_FUNCTION(mb_ereg_search)
! 1221: {
! 1222: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
! 1223: }
! 1224: /* }}} */
! 1225:
! 1226: /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
! 1227: Regular expression search for multibyte string */
! 1228: PHP_FUNCTION(mb_ereg_search_pos)
! 1229: {
! 1230: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
! 1231: }
! 1232: /* }}} */
! 1233:
! 1234: /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
! 1235: Regular expression search for multibyte string */
! 1236: PHP_FUNCTION(mb_ereg_search_regs)
! 1237: {
! 1238: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
! 1239: }
! 1240: /* }}} */
! 1241:
! 1242: /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
! 1243: Initialize string and regular expression for search. */
! 1244: PHP_FUNCTION(mb_ereg_search_init)
! 1245: {
! 1246: size_t argc = ZEND_NUM_ARGS();
! 1247: zval *arg_str;
! 1248: char *arg_pattern = NULL, *arg_options = NULL;
! 1249: int arg_pattern_len = 0, arg_options_len = 0;
! 1250: OnigSyntaxType *syntax = NULL;
! 1251: OnigOptionType option;
! 1252:
! 1253: if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
! 1254: return;
! 1255: }
! 1256:
! 1257: if (argc > 1 && arg_pattern_len == 0) {
! 1258: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
! 1259: RETURN_FALSE;
! 1260: }
! 1261:
! 1262: option = MBREX(regex_default_options);
! 1263: syntax = MBREX(regex_default_syntax);
! 1264:
! 1265: if (argc == 3) {
! 1266: option = 0;
! 1267: _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
! 1268: }
! 1269:
! 1270: if (argc > 1) {
! 1271: /* create regex pattern buffer */
! 1272: if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
! 1273: RETURN_FALSE;
! 1274: }
! 1275: }
! 1276:
! 1277: if (MBREX(search_str) != NULL) {
! 1278: zval_ptr_dtor(&MBREX(search_str));
! 1279: MBREX(search_str) = (zval *)NULL;
! 1280: }
! 1281:
! 1282: MBREX(search_str) = arg_str;
! 1283: Z_ADDREF_P(MBREX(search_str));
! 1284: SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
! 1285:
! 1286: MBREX(search_pos) = 0;
! 1287:
! 1288: if (MBREX(search_regs) != NULL) {
! 1289: onig_region_free(MBREX(search_regs), 1);
! 1290: MBREX(search_regs) = (OnigRegion *) NULL;
! 1291: }
! 1292:
! 1293: RETURN_TRUE;
! 1294: }
! 1295: /* }}} */
! 1296:
! 1297: /* {{{ proto array mb_ereg_search_getregs(void)
! 1298: Get matched substring of the last time */
! 1299: PHP_FUNCTION(mb_ereg_search_getregs)
! 1300: {
! 1301: int n, i, len, beg, end;
! 1302: OnigUChar *str;
! 1303:
! 1304: if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
! 1305: array_init(return_value);
! 1306:
! 1307: str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
! 1308: len = Z_STRLEN_P(MBREX(search_str));
! 1309: n = MBREX(search_regs)->num_regs;
! 1310: for (i = 0; i < n; i++) {
! 1311: beg = MBREX(search_regs)->beg[i];
! 1312: end = MBREX(search_regs)->end[i];
! 1313: if (beg >= 0 && beg <= end && end <= len) {
! 1314: add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
! 1315: } else {
! 1316: add_index_bool(return_value, i, 0);
! 1317: }
! 1318: }
! 1319: } else {
! 1320: RETVAL_FALSE;
! 1321: }
! 1322: }
! 1323: /* }}} */
! 1324:
! 1325: /* {{{ proto int mb_ereg_search_getpos(void)
! 1326: Get search start position */
! 1327: PHP_FUNCTION(mb_ereg_search_getpos)
! 1328: {
! 1329: RETVAL_LONG(MBREX(search_pos));
! 1330: }
! 1331: /* }}} */
! 1332:
! 1333: /* {{{ proto bool mb_ereg_search_setpos(int position)
! 1334: Set search start position */
! 1335: PHP_FUNCTION(mb_ereg_search_setpos)
! 1336: {
! 1337: long position;
! 1338:
! 1339: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
! 1340: return;
! 1341: }
! 1342:
! 1343: if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
! 1344: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
! 1345: MBREX(search_pos) = 0;
! 1346: RETURN_FALSE;
! 1347: }
! 1348:
! 1349: MBREX(search_pos) = position;
! 1350: RETURN_TRUE;
! 1351: }
! 1352: /* }}} */
! 1353:
! 1354: /* {{{ php_mb_regex_set_options */
! 1355: static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
! 1356: {
! 1357: if (prev_options != NULL) {
! 1358: *prev_options = MBREX(regex_default_options);
! 1359: }
! 1360: if (prev_syntax != NULL) {
! 1361: *prev_syntax = MBREX(regex_default_syntax);
! 1362: }
! 1363: MBREX(regex_default_options) = options;
! 1364: MBREX(regex_default_syntax) = syntax;
! 1365: }
! 1366: /* }}} */
! 1367:
! 1368: /* {{{ proto string mb_regex_set_options([string options])
! 1369: Set or get the default options for mbregex functions */
! 1370: PHP_FUNCTION(mb_regex_set_options)
! 1371: {
! 1372: OnigOptionType opt;
! 1373: OnigSyntaxType *syntax;
! 1374: char *string = NULL;
! 1375: int string_len;
! 1376: char buf[16];
! 1377:
! 1378: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
! 1379: &string, &string_len) == FAILURE) {
! 1380: RETURN_FALSE;
! 1381: }
! 1382: if (string != NULL) {
! 1383: opt = 0;
! 1384: syntax = NULL;
! 1385: _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
! 1386: _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
! 1387: } else {
! 1388: opt = MBREX(regex_default_options);
! 1389: syntax = MBREX(regex_default_syntax);
! 1390: }
! 1391: _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
! 1392:
! 1393: RETVAL_STRING(buf, 1);
! 1394: }
! 1395: /* }}} */
! 1396:
! 1397: #endif /* HAVE_MBREGEX */
! 1398:
! 1399: /*
! 1400: * Local variables:
! 1401: * tab-width: 4
! 1402: * c-basic-offset: 4
! 1403: * End:
! 1404: * vim600: fdm=marker
! 1405: * vim: noet sw=4 ts=4
! 1406: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>