Return to php_pcre.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / pcre |
1.1 ! misho 1: /* ! 2: +----------------------------------------------------------------------+ ! 3: | PHP Version 5 | ! 4: +----------------------------------------------------------------------+ ! 5: | Copyright (c) 1997-2012 The PHP Group | ! 6: +----------------------------------------------------------------------+ ! 7: | This source file is subject to version 3.01 of the PHP license, | ! 8: | that is bundled with this package in the file LICENSE, and is | ! 9: | available through the world-wide-web at the following url: | ! 10: | http://www.php.net/license/3_01.txt | ! 11: | If you did not receive a copy of the PHP license and are unable to | ! 12: | obtain it through the world-wide-web, please send a note to | ! 13: | license@php.net so we can mail you a copy immediately. | ! 14: +----------------------------------------------------------------------+ ! 15: | Author: Andrei Zmievski <andrei@php.net> | ! 16: +----------------------------------------------------------------------+ ! 17: */ ! 18: ! 19: /* $Id: php_pcre.c 321634 2012-01-01 13:15:04Z felipe $ */ ! 20: ! 21: #include "php.h" ! 22: #include "php_ini.h" ! 23: #include "php_globals.h" ! 24: #include "php_pcre.h" ! 25: #include "ext/standard/info.h" ! 26: #include "ext/standard/php_smart_str.h" ! 27: ! 28: #if HAVE_PCRE || HAVE_BUNDLED_PCRE ! 29: ! 30: #include "ext/standard/php_string.h" ! 31: ! 32: #define PREG_PATTERN_ORDER 1 ! 33: #define PREG_SET_ORDER 2 ! 34: #define PREG_OFFSET_CAPTURE (1<<8) ! 35: ! 36: #define PREG_SPLIT_NO_EMPTY (1<<0) ! 37: #define PREG_SPLIT_DELIM_CAPTURE (1<<1) ! 38: #define PREG_SPLIT_OFFSET_CAPTURE (1<<2) ! 39: ! 40: #define PREG_REPLACE_EVAL (1<<0) ! 41: ! 42: #define PREG_GREP_INVERT (1<<0) ! 43: ! 44: #define PCRE_CACHE_SIZE 4096 ! 45: ! 46: enum { ! 47: PHP_PCRE_NO_ERROR = 0, ! 48: PHP_PCRE_INTERNAL_ERROR, ! 49: PHP_PCRE_BACKTRACK_LIMIT_ERROR, ! 50: PHP_PCRE_RECURSION_LIMIT_ERROR, ! 51: PHP_PCRE_BAD_UTF8_ERROR, ! 52: PHP_PCRE_BAD_UTF8_OFFSET_ERROR ! 53: }; ! 54: ! 55: ! 56: ZEND_DECLARE_MODULE_GLOBALS(pcre) ! 57: ! 58: ! 59: static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ ! 60: { ! 61: int preg_code = 0; ! 62: ! 63: switch (pcre_code) { ! 64: case PCRE_ERROR_MATCHLIMIT: ! 65: preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR; ! 66: break; ! 67: ! 68: case PCRE_ERROR_RECURSIONLIMIT: ! 69: preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR; ! 70: break; ! 71: ! 72: case PCRE_ERROR_BADUTF8: ! 73: preg_code = PHP_PCRE_BAD_UTF8_ERROR; ! 74: break; ! 75: ! 76: case PCRE_ERROR_BADUTF8_OFFSET: ! 77: preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; ! 78: break; ! 79: ! 80: default: ! 81: preg_code = PHP_PCRE_INTERNAL_ERROR; ! 82: break; ! 83: } ! 84: ! 85: PCRE_G(error_code) = preg_code; ! 86: } ! 87: /* }}} */ ! 88: ! 89: static void php_free_pcre_cache(void *data) /* {{{ */ ! 90: { ! 91: pcre_cache_entry *pce = (pcre_cache_entry *) data; ! 92: if (!pce) return; ! 93: pefree(pce->re, 1); ! 94: if (pce->extra) pefree(pce->extra, 1); ! 95: #if HAVE_SETLOCALE ! 96: if ((void*)pce->tables) pefree((void*)pce->tables, 1); ! 97: pefree(pce->locale, 1); ! 98: #endif ! 99: } ! 100: /* }}} */ ! 101: ! 102: static PHP_GINIT_FUNCTION(pcre) /* {{{ */ ! 103: { ! 104: zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1); ! 105: pcre_globals->backtrack_limit = 0; ! 106: pcre_globals->recursion_limit = 0; ! 107: pcre_globals->error_code = PHP_PCRE_NO_ERROR; ! 108: } ! 109: /* }}} */ ! 110: ! 111: static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */ ! 112: { ! 113: zend_hash_destroy(&pcre_globals->pcre_cache); ! 114: } ! 115: /* }}} */ ! 116: ! 117: PHP_INI_BEGIN() ! 118: STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals) ! 119: STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals) ! 120: PHP_INI_END() ! 121: ! 122: ! 123: /* {{{ PHP_MINFO_FUNCTION(pcre) */ ! 124: static PHP_MINFO_FUNCTION(pcre) ! 125: { ! 126: php_info_print_table_start(); ! 127: php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" ); ! 128: php_info_print_table_row(2, "PCRE Library Version", pcre_version() ); ! 129: php_info_print_table_end(); ! 130: ! 131: DISPLAY_INI_ENTRIES(); ! 132: } ! 133: /* }}} */ ! 134: ! 135: /* {{{ PHP_MINIT_FUNCTION(pcre) */ ! 136: static PHP_MINIT_FUNCTION(pcre) ! 137: { ! 138: REGISTER_INI_ENTRIES(); ! 139: ! 140: REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); ! 141: REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); ! 142: REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); ! 143: REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); ! 144: REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); ! 145: REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); ! 146: REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT); ! 147: ! 148: REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT); ! 149: REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT); ! 150: REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); ! 151: REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); ! 152: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); ! 153: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); ! 154: REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); ! 155: ! 156: return SUCCESS; ! 157: } ! 158: /* }}} */ ! 159: ! 160: /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */ ! 161: static PHP_MSHUTDOWN_FUNCTION(pcre) ! 162: { ! 163: UNREGISTER_INI_ENTRIES(); ! 164: ! 165: return SUCCESS; ! 166: } ! 167: /* }}} */ ! 168: ! 169: /* {{{ static pcre_clean_cache */ ! 170: static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) ! 171: { ! 172: int *num_clean = (int *)arg; ! 173: ! 174: if (*num_clean > 0) { ! 175: (*num_clean)--; ! 176: return 1; ! 177: } else { ! 178: return 0; ! 179: } ! 180: } ! 181: /* }}} */ ! 182: ! 183: /* {{{ static make_subpats_table */ ! 184: static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC) ! 185: { ! 186: pcre_extra *extra = pce->extra; ! 187: int name_cnt = 0, name_size, ni = 0; ! 188: int rc; ! 189: char *name_table; ! 190: unsigned short name_idx; ! 191: char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); ! 192: ! 193: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); ! 194: if (rc < 0) { ! 195: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); ! 196: efree(subpat_names); ! 197: return NULL; ! 198: } ! 199: if (name_cnt > 0) { ! 200: int rc1, rc2; ! 201: ! 202: rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); ! 203: rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); ! 204: rc = rc2 ? rc2 : rc1; ! 205: if (rc < 0) { ! 206: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); ! 207: efree(subpat_names); ! 208: return NULL; ! 209: } ! 210: ! 211: while (ni++ < name_cnt) { ! 212: name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1]; ! 213: subpat_names[name_idx] = name_table + 2; ! 214: if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { ! 215: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); ! 216: efree(subpat_names); ! 217: return NULL; ! 218: } ! 219: name_table += name_size; ! 220: } ! 221: } ! 222: ! 223: return subpat_names; ! 224: } ! 225: /* }}} */ ! 226: ! 227: /* {{{ pcre_get_compiled_regex_cache ! 228: */ ! 229: PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) ! 230: { ! 231: pcre *re = NULL; ! 232: pcre_extra *extra; ! 233: int coptions = 0; ! 234: int soptions = 0; ! 235: const char *error; ! 236: int erroffset; ! 237: char delimiter; ! 238: char start_delimiter; ! 239: char end_delimiter; ! 240: char *p, *pp; ! 241: char *pattern; ! 242: int do_study = 0; ! 243: int poptions = 0; ! 244: int count = 0; ! 245: unsigned const char *tables = NULL; ! 246: #if HAVE_SETLOCALE ! 247: char *locale = setlocale(LC_CTYPE, NULL); ! 248: #endif ! 249: pcre_cache_entry *pce; ! 250: pcre_cache_entry new_entry; ! 251: ! 252: /* Try to lookup the cached regex entry, and if successful, just pass ! 253: back the compiled pattern, otherwise go on and compile it. */ ! 254: if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) { ! 255: /* ! 256: * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it ! 257: * is, we flush it and compile the pattern from scratch. ! 258: */ ! 259: if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) { ! 260: zend_hash_clean(&PCRE_G(pcre_cache)); ! 261: } else { ! 262: #if HAVE_SETLOCALE ! 263: if (!strcmp(pce->locale, locale)) { ! 264: #endif ! 265: return pce; ! 266: #if HAVE_SETLOCALE ! 267: } ! 268: #endif ! 269: } ! 270: } ! 271: ! 272: p = regex; ! 273: ! 274: /* Parse through the leading whitespace, and display a warning if we ! 275: get to the end without encountering a delimiter. */ ! 276: while (isspace((int)*(unsigned char *)p)) p++; ! 277: if (*p == 0) { ! 278: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); ! 279: return NULL; ! 280: } ! 281: ! 282: /* Get the delimiter and display a warning if it is alphanumeric ! 283: or a backslash. */ ! 284: delimiter = *p++; ! 285: if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') { ! 286: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash"); ! 287: return NULL; ! 288: } ! 289: ! 290: start_delimiter = delimiter; ! 291: if ((pp = strchr("([{< )]}> )]}>", delimiter))) ! 292: delimiter = pp[5]; ! 293: end_delimiter = delimiter; ! 294: ! 295: if (start_delimiter == end_delimiter) { ! 296: /* We need to iterate through the pattern, searching for the ending delimiter, ! 297: but skipping the backslashed delimiters. If the ending delimiter is not ! 298: found, display a warning. */ ! 299: pp = p; ! 300: while (*pp != 0) { ! 301: if (*pp == '\\' && pp[1] != 0) pp++; ! 302: else if (*pp == delimiter) ! 303: break; ! 304: pp++; ! 305: } ! 306: if (*pp == 0) { ! 307: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter); ! 308: return NULL; ! 309: } ! 310: } else { ! 311: /* We iterate through the pattern, searching for the matching ending ! 312: * delimiter. For each matching starting delimiter, we increment nesting ! 313: * level, and decrement it for each matching ending delimiter. If we ! 314: * reach the end of the pattern without matching, display a warning. ! 315: */ ! 316: int brackets = 1; /* brackets nesting level */ ! 317: pp = p; ! 318: while (*pp != 0) { ! 319: if (*pp == '\\' && pp[1] != 0) pp++; ! 320: else if (*pp == end_delimiter && --brackets <= 0) ! 321: break; ! 322: else if (*pp == start_delimiter) ! 323: brackets++; ! 324: pp++; ! 325: } ! 326: if (*pp == 0) { ! 327: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter); ! 328: return NULL; ! 329: } ! 330: } ! 331: ! 332: /* Make a copy of the actual pattern. */ ! 333: pattern = estrndup(p, pp-p); ! 334: ! 335: /* Move on to the options */ ! 336: pp++; ! 337: ! 338: /* Parse through the options, setting appropriate flags. Display ! 339: a warning if we encounter an unknown modifier. */ ! 340: while (*pp != 0) { ! 341: switch (*pp++) { ! 342: /* Perl compatible options */ ! 343: case 'i': coptions |= PCRE_CASELESS; break; ! 344: case 'm': coptions |= PCRE_MULTILINE; break; ! 345: case 's': coptions |= PCRE_DOTALL; break; ! 346: case 'x': coptions |= PCRE_EXTENDED; break; ! 347: ! 348: /* PCRE specific options */ ! 349: case 'A': coptions |= PCRE_ANCHORED; break; ! 350: case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break; ! 351: case 'S': do_study = 1; break; ! 352: case 'U': coptions |= PCRE_UNGREEDY; break; ! 353: case 'X': coptions |= PCRE_EXTRA; break; ! 354: case 'u': coptions |= PCRE_UTF8; ! 355: /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII ! 356: characters, even in UTF-8 mode. However, this can be changed by setting ! 357: the PCRE_UCP option. */ ! 358: #ifdef PCRE_UCP ! 359: coptions |= PCRE_UCP; ! 360: #endif ! 361: break; ! 362: ! 363: /* Custom preg options */ ! 364: case 'e': poptions |= PREG_REPLACE_EVAL; break; ! 365: ! 366: case ' ': ! 367: case '\n': ! 368: break; ! 369: ! 370: default: ! 371: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]); ! 372: efree(pattern); ! 373: return NULL; ! 374: } ! 375: } ! 376: ! 377: #if HAVE_SETLOCALE ! 378: if (strcmp(locale, "C")) ! 379: tables = pcre_maketables(); ! 380: #endif ! 381: ! 382: /* Compile pattern and display a warning if compilation failed. */ ! 383: re = pcre_compile(pattern, ! 384: coptions, ! 385: &error, ! 386: &erroffset, ! 387: tables); ! 388: ! 389: if (re == NULL) { ! 390: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset); ! 391: efree(pattern); ! 392: if (tables) { ! 393: pefree((void*)tables, 1); ! 394: } ! 395: return NULL; ! 396: } ! 397: ! 398: /* If study option was specified, study the pattern and ! 399: store the result in extra for passing to pcre_exec. */ ! 400: if (do_study) { ! 401: extra = pcre_study(re, soptions, &error); ! 402: if (extra) { ! 403: extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; ! 404: } ! 405: if (error != NULL) { ! 406: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern"); ! 407: } ! 408: } else { ! 409: extra = NULL; ! 410: } ! 411: ! 412: efree(pattern); ! 413: ! 414: /* ! 415: * If we reached cache limit, clean out the items from the head of the list; ! 416: * these are supposedly the oldest ones (but not necessarily the least used ! 417: * ones). ! 418: */ ! 419: if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) { ! 420: int num_clean = PCRE_CACHE_SIZE / 8; ! 421: zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC); ! 422: } ! 423: ! 424: /* Store the compiled pattern and extra info in the cache. */ ! 425: new_entry.re = re; ! 426: new_entry.extra = extra; ! 427: new_entry.preg_options = poptions; ! 428: new_entry.compile_options = coptions; ! 429: #if HAVE_SETLOCALE ! 430: new_entry.locale = pestrdup(locale, 1); ! 431: new_entry.tables = tables; ! 432: #endif ! 433: zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry, ! 434: sizeof(pcre_cache_entry), (void**)&pce); ! 435: ! 436: return pce; ! 437: } ! 438: /* }}} */ ! 439: ! 440: /* {{{ pcre_get_compiled_regex ! 441: */ ! 442: PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) ! 443: { ! 444: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); ! 445: ! 446: if (extra) { ! 447: *extra = pce ? pce->extra : NULL; ! 448: } ! 449: if (preg_options) { ! 450: *preg_options = pce ? pce->preg_options : 0; ! 451: } ! 452: ! 453: return pce ? pce->re : NULL; ! 454: } ! 455: /* }}} */ ! 456: ! 457: /* {{{ pcre_get_compiled_regex_ex ! 458: */ ! 459: PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) ! 460: { ! 461: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); ! 462: ! 463: if (extra) { ! 464: *extra = pce ? pce->extra : NULL; ! 465: } ! 466: if (preg_options) { ! 467: *preg_options = pce ? pce->preg_options : 0; ! 468: } ! 469: if (compile_options) { ! 470: *compile_options = pce ? pce->compile_options : 0; ! 471: } ! 472: ! 473: return pce ? pce->re : NULL; ! 474: } ! 475: /* }}} */ ! 476: ! 477: /* {{{ add_offset_pair */ ! 478: static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) ! 479: { ! 480: zval *match_pair; ! 481: ! 482: ALLOC_ZVAL(match_pair); ! 483: array_init(match_pair); ! 484: INIT_PZVAL(match_pair); ! 485: ! 486: /* Add (match, offset) to the return value */ ! 487: add_next_index_stringl(match_pair, str, len, 1); ! 488: add_next_index_long(match_pair, offset); ! 489: ! 490: if (name) { ! 491: zval_add_ref(&match_pair); ! 492: zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); ! 493: } ! 494: zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); ! 495: } ! 496: /* }}} */ ! 497: ! 498: static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */ ! 499: { ! 500: /* parameters */ ! 501: char *regex; /* Regular expression */ ! 502: char *subject; /* String to match against */ ! 503: int regex_len; ! 504: int subject_len; ! 505: pcre_cache_entry *pce; /* Compiled regular expression */ ! 506: zval *subpats = NULL; /* Array for subpatterns */ ! 507: long flags = 0; /* Match control flags */ ! 508: long start_offset = 0; /* Where the new search starts */ ! 509: ! 510: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len, ! 511: &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { ! 512: RETURN_FALSE; ! 513: } ! 514: ! 515: /* Compile regex or get it from cache. */ ! 516: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { ! 517: RETURN_FALSE; ! 518: } ! 519: ! 520: php_pcre_match_impl(pce, subject, subject_len, return_value, subpats, ! 521: global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC); ! 522: } ! 523: /* }}} */ ! 524: ! 525: /* {{{ php_pcre_match_impl() */ ! 526: PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, ! 527: zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC) ! 528: { ! 529: zval *result_set, /* Holds a set of subpatterns after ! 530: a global match */ ! 531: **match_sets = NULL; /* An array of sets of matches for each ! 532: subpattern after a global match */ ! 533: pcre_extra *extra = pce->extra;/* Holds results of studying */ ! 534: pcre_extra extra_data; /* Used locally for exec options */ ! 535: int exoptions = 0; /* Execution options */ ! 536: int count = 0; /* Count of matched subpatterns */ ! 537: int *offsets; /* Array of subpattern offsets */ ! 538: int num_subpats; /* Number of captured subpatterns */ ! 539: int size_offsets; /* Size of the offsets array */ ! 540: int matched; /* Has anything matched */ ! 541: int g_notempty = 0; /* If the match should not be empty */ ! 542: const char **stringlist; /* Holds list of subpatterns */ ! 543: char **subpat_names; /* Array for named subpatterns */ ! 544: int i, rc; ! 545: int subpats_order; /* Order of subpattern matches */ ! 546: int offset_capture; /* Capture match offsets: yes/no */ ! 547: ! 548: /* Overwrite the passed-in value for subpatterns with an empty array. */ ! 549: if (subpats != NULL) { ! 550: zval_dtor(subpats); ! 551: array_init(subpats); ! 552: } ! 553: ! 554: subpats_order = global ? PREG_PATTERN_ORDER : 0; ! 555: ! 556: if (use_flags) { ! 557: offset_capture = flags & PREG_OFFSET_CAPTURE; ! 558: ! 559: /* ! 560: * subpats_order is pre-set to pattern mode so we change it only if ! 561: * necessary. ! 562: */ ! 563: if (flags & 0xff) { ! 564: subpats_order = flags & 0xff; ! 565: } ! 566: if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || ! 567: (!global && subpats_order != 0)) { ! 568: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified"); ! 569: return; ! 570: } ! 571: } else { ! 572: offset_capture = 0; ! 573: } ! 574: ! 575: /* Negative offset counts from the end of the string. */ ! 576: if (start_offset < 0) { ! 577: start_offset = subject_len + start_offset; ! 578: if (start_offset < 0) { ! 579: start_offset = 0; ! 580: } ! 581: } ! 582: ! 583: if (extra == NULL) { ! 584: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; ! 585: extra = &extra_data; ! 586: } ! 587: extra->match_limit = PCRE_G(backtrack_limit); ! 588: extra->match_limit_recursion = PCRE_G(recursion_limit); ! 589: ! 590: /* Calculate the size of the offsets array, and allocate memory for it. */ ! 591: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); ! 592: if (rc < 0) { ! 593: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); ! 594: RETURN_FALSE; ! 595: } ! 596: num_subpats++; ! 597: size_offsets = num_subpats * 3; ! 598: ! 599: /* ! 600: * Build a mapping from subpattern numbers to their names. We will always ! 601: * allocate the table, even though there may be no named subpatterns. This ! 602: * avoids somewhat more complicated logic in the inner loops. ! 603: */ ! 604: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); ! 605: if (!subpat_names) { ! 606: RETURN_FALSE; ! 607: } ! 608: ! 609: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); ! 610: ! 611: /* Allocate match sets array and initialize the values. */ ! 612: if (global && subpats_order == PREG_PATTERN_ORDER) { ! 613: match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); ! 614: for (i=0; i<num_subpats; i++) { ! 615: ALLOC_ZVAL(match_sets[i]); ! 616: array_init(match_sets[i]); ! 617: INIT_PZVAL(match_sets[i]); ! 618: } ! 619: } ! 620: ! 621: matched = 0; ! 622: PCRE_G(error_code) = PHP_PCRE_NO_ERROR; ! 623: ! 624: do { ! 625: /* Execute the regular expression. */ ! 626: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, ! 627: exoptions|g_notempty, offsets, size_offsets); ! 628: ! 629: /* the string was already proved to be valid UTF-8 */ ! 630: exoptions |= PCRE_NO_UTF8_CHECK; ! 631: ! 632: /* Check for too many substrings condition. */ ! 633: if (count == 0) { ! 634: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); ! 635: count = size_offsets/3; ! 636: } ! 637: ! 638: /* If something has matched */ ! 639: if (count > 0) { ! 640: matched++; ! 641: ! 642: /* If subpatterns array has been passed, fill it in with values. */ ! 643: if (subpats != NULL) { ! 644: /* Try to get the list of substrings and display a warning if failed. */ ! 645: if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { ! 646: efree(subpat_names); ! 647: efree(offsets); ! 648: if (match_sets) efree(match_sets); ! 649: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed"); ! 650: RETURN_FALSE; ! 651: } ! 652: ! 653: if (global) { /* global pattern matching */ ! 654: if (subpats_order == PREG_PATTERN_ORDER) { ! 655: /* For each subpattern, insert it into the appropriate array. */ ! 656: for (i = 0; i < count; i++) { ! 657: if (offset_capture) { ! 658: add_offset_pair(match_sets[i], (char *)stringlist[i], ! 659: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); ! 660: } else { ! 661: add_next_index_stringl(match_sets[i], (char *)stringlist[i], ! 662: offsets[(i<<1)+1] - offsets[i<<1], 1); ! 663: } ! 664: } ! 665: /* ! 666: * If the number of captured subpatterns on this run is ! 667: * less than the total possible number, pad the result ! 668: * arrays with empty strings. ! 669: */ ! 670: if (count < num_subpats) { ! 671: for (; i < num_subpats; i++) { ! 672: add_next_index_string(match_sets[i], "", 1); ! 673: } ! 674: } ! 675: } else { ! 676: /* Allocate the result set array */ ! 677: ALLOC_ZVAL(result_set); ! 678: array_init(result_set); ! 679: INIT_PZVAL(result_set); ! 680: ! 681: /* Add all the subpatterns to it */ ! 682: for (i = 0; i < count; i++) { ! 683: if (offset_capture) { ! 684: add_offset_pair(result_set, (char *)stringlist[i], ! 685: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); ! 686: } else { ! 687: if (subpat_names[i]) { ! 688: add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i], ! 689: offsets[(i<<1)+1] - offsets[i<<1], 1); ! 690: } ! 691: add_next_index_stringl(result_set, (char *)stringlist[i], ! 692: offsets[(i<<1)+1] - offsets[i<<1], 1); ! 693: } ! 694: } ! 695: /* And add it to the output array */ ! 696: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); ! 697: } ! 698: } else { /* single pattern matching */ ! 699: /* For each subpattern, insert it into the subpatterns array. */ ! 700: for (i = 0; i < count; i++) { ! 701: if (offset_capture) { ! 702: add_offset_pair(subpats, (char *)stringlist[i], ! 703: offsets[(i<<1)+1] - offsets[i<<1], ! 704: offsets[i<<1], subpat_names[i]); ! 705: } else { ! 706: if (subpat_names[i]) { ! 707: add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], ! 708: offsets[(i<<1)+1] - offsets[i<<1], 1); ! 709: } ! 710: add_next_index_stringl(subpats, (char *)stringlist[i], ! 711: offsets[(i<<1)+1] - offsets[i<<1], 1); ! 712: } ! 713: } ! 714: } ! 715: ! 716: pcre_free((void *) stringlist); ! 717: } ! 718: } else if (count == PCRE_ERROR_NOMATCH) { ! 719: /* If we previously set PCRE_NOTEMPTY after a null match, ! 720: this is not necessarily the end. We need to advance ! 721: the start offset, and continue. Fudge the offset values ! 722: to achieve this, unless we're already at the end of the string. */ ! 723: if (g_notempty != 0 && start_offset < subject_len) { ! 724: offsets[0] = start_offset; ! 725: offsets[1] = start_offset + 1; ! 726: } else ! 727: break; ! 728: } else { ! 729: pcre_handle_exec_error(count TSRMLS_CC); ! 730: break; ! 731: } ! 732: ! 733: /* If we have matched an empty string, mimic what Perl's /g options does. ! 734: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try ! 735: the match again at the same point. If this fails (picked up above) we ! 736: advance to the next character. */ ! 737: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; ! 738: ! 739: /* Advance to the position right after the last full match */ ! 740: start_offset = offsets[1]; ! 741: } while (global); ! 742: ! 743: /* Add the match sets to the output array and clean up */ ! 744: if (global && subpats_order == PREG_PATTERN_ORDER) { ! 745: for (i = 0; i < num_subpats; i++) { ! 746: if (subpat_names[i]) { ! 747: zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], ! 748: strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL); ! 749: Z_ADDREF_P(match_sets[i]); ! 750: } ! 751: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); ! 752: } ! 753: efree(match_sets); ! 754: } ! 755: ! 756: efree(offsets); ! 757: efree(subpat_names); ! 758: ! 759: /* Did we encounter an error? */ ! 760: if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) { ! 761: RETVAL_LONG(matched); ! 762: } else { ! 763: RETVAL_FALSE; ! 764: } ! 765: } ! 766: /* }}} */ ! 767: ! 768: /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]]) ! 769: Perform a Perl-style regular expression match */ ! 770: static PHP_FUNCTION(preg_match) ! 771: { ! 772: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); ! 773: } ! 774: /* }}} */ ! 775: ! 776: /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]]) ! 777: Perform a Perl-style global regular expression match */ ! 778: static PHP_FUNCTION(preg_match_all) ! 779: { ! 780: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); ! 781: } ! 782: /* }}} */ ! 783: ! 784: /* {{{ preg_get_backref ! 785: */ ! 786: static int preg_get_backref(char **str, int *backref) ! 787: { ! 788: register char in_brace = 0; ! 789: register char *walk = *str; ! 790: ! 791: if (walk[1] == 0) ! 792: return 0; ! 793: ! 794: if (*walk == '$' && walk[1] == '{') { ! 795: in_brace = 1; ! 796: walk++; ! 797: } ! 798: walk++; ! 799: ! 800: if (*walk >= '0' && *walk <= '9') { ! 801: *backref = *walk - '0'; ! 802: walk++; ! 803: } else ! 804: return 0; ! 805: ! 806: if (*walk && *walk >= '0' && *walk <= '9') { ! 807: *backref = *backref * 10 + *walk - '0'; ! 808: walk++; ! 809: } ! 810: ! 811: if (in_brace) { ! 812: if (*walk == 0 || *walk != '}') ! 813: return 0; ! 814: else ! 815: walk++; ! 816: } ! 817: ! 818: *str = walk; ! 819: return 1; ! 820: } ! 821: /* }}} */ ! 822: ! 823: /* {{{ preg_do_repl_func ! 824: */ ! 825: static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) ! 826: { ! 827: zval *retval_ptr; /* Function return value */ ! 828: zval **args[1]; /* Argument to pass to function */ ! 829: zval *subpats; /* Captured subpatterns */ ! 830: int result_len; /* Return value length */ ! 831: int i; ! 832: ! 833: MAKE_STD_ZVAL(subpats); ! 834: array_init(subpats); ! 835: for (i = 0; i < count; i++) { ! 836: if (subpat_names[i]) { ! 837: add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1); ! 838: } ! 839: add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); ! 840: } ! 841: args[0] = &subpats; ! 842: ! 843: if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { ! 844: convert_to_string_ex(&retval_ptr); ! 845: *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); ! 846: result_len = Z_STRLEN_P(retval_ptr); ! 847: zval_ptr_dtor(&retval_ptr); ! 848: } else { ! 849: if (!EG(exception)) { ! 850: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function"); ! 851: } ! 852: result_len = offsets[1] - offsets[0]; ! 853: *result = estrndup(&subject[offsets[0]], result_len); ! 854: } ! 855: ! 856: zval_ptr_dtor(&subpats); ! 857: ! 858: return result_len; ! 859: } ! 860: /* }}} */ ! 861: ! 862: /* {{{ preg_do_eval ! 863: */ ! 864: static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, ! 865: int *offsets, int count, char **result TSRMLS_DC) ! 866: { ! 867: zval retval; /* Return value from evaluation */ ! 868: char *eval_str_end, /* End of eval string */ ! 869: *match, /* Current match for a backref */ ! 870: *esc_match, /* Quote-escaped match */ ! 871: *walk, /* Used to walk the code string */ ! 872: *segment, /* Start of segment to append while walking */ ! 873: walk_last; /* Last walked character */ ! 874: int match_len; /* Length of the match */ ! 875: int esc_match_len; /* Length of the quote-escaped match */ ! 876: int result_len; /* Length of the result of the evaluation */ ! 877: int backref; /* Current backref */ ! 878: char *compiled_string_description; ! 879: smart_str code = {0}; ! 880: ! 881: eval_str_end = eval_str + eval_str_len; ! 882: walk = segment = eval_str; ! 883: walk_last = 0; ! 884: ! 885: while (walk < eval_str_end) { ! 886: /* If found a backreference.. */ ! 887: if ('\\' == *walk || '$' == *walk) { ! 888: smart_str_appendl(&code, segment, walk - segment); ! 889: if (walk_last == '\\') { ! 890: code.c[code.len-1] = *walk++; ! 891: segment = walk; ! 892: walk_last = 0; ! 893: continue; ! 894: } ! 895: segment = walk; ! 896: if (preg_get_backref(&walk, &backref)) { ! 897: if (backref < count) { ! 898: /* Find the corresponding string match and substitute it ! 899: in instead of the backref */ ! 900: match = subject + offsets[backref<<1]; ! 901: match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; ! 902: if (match_len) { ! 903: esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC); ! 904: } else { ! 905: esc_match = match; ! 906: esc_match_len = 0; ! 907: } ! 908: } else { ! 909: esc_match = ""; ! 910: esc_match_len = 0; ! 911: } ! 912: smart_str_appendl(&code, esc_match, esc_match_len); ! 913: ! 914: segment = walk; ! 915: ! 916: /* Clean up and reassign */ ! 917: if (esc_match_len) ! 918: efree(esc_match); ! 919: continue; ! 920: } ! 921: } ! 922: walk++; ! 923: walk_last = walk[-1]; ! 924: } ! 925: smart_str_appendl(&code, segment, walk - segment); ! 926: smart_str_0(&code); ! 927: ! 928: compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC); ! 929: /* Run the code */ ! 930: if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) { ! 931: efree(compiled_string_description); ! 932: php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c); ! 933: /* zend_error() does not return in this case */ ! 934: } ! 935: efree(compiled_string_description); ! 936: convert_to_string(&retval); ! 937: ! 938: /* Save the return value and its length */ ! 939: *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval)); ! 940: result_len = Z_STRLEN(retval); ! 941: ! 942: /* Clean up */ ! 943: zval_dtor(&retval); ! 944: smart_str_free(&code); ! 945: ! 946: return result_len; ! 947: } ! 948: /* }}} */ ! 949: ! 950: /* {{{ php_pcre_replace ! 951: */ ! 952: PHPAPI char *php_pcre_replace(char *regex, int regex_len, ! 953: char *subject, int subject_len, ! 954: zval *replace_val, int is_callable_replace, ! 955: int *result_len, int limit, int *replace_count TSRMLS_DC) ! 956: { ! 957: pcre_cache_entry *pce; /* Compiled regular expression */ ! 958: ! 959: /* Compile regex or get it from cache. */ ! 960: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { ! 961: return NULL; ! 962: } ! 963: ! 964: return php_pcre_replace_impl(pce, subject, subject_len, replace_val, ! 965: is_callable_replace, result_len, limit, replace_count TSRMLS_CC); ! 966: } ! 967: /* }}} */ ! 968: ! 969: /* {{{ php_pcre_replace_impl() */ ! 970: PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, ! 971: int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC) ! 972: { ! 973: pcre_extra *extra = pce->extra;/* Holds results of studying */ ! 974: pcre_extra extra_data; /* Used locally for exec options */ ! 975: int exoptions = 0; /* Execution options */ ! 976: int count = 0; /* Count of matched subpatterns */ ! 977: int *offsets; /* Array of subpattern offsets */ ! 978: char **subpat_names; /* Array for named subpatterns */ ! 979: int num_subpats; /* Number of captured subpatterns */ ! 980: int size_offsets; /* Size of the offsets array */ ! 981: int new_len; /* Length of needed storage */ ! 982: int alloc_len; /* Actual allocated length */ ! 983: int eval_result_len=0; /* Length of the eval'ed or ! 984: function-returned string */ ! 985: int match_len; /* Length of the current match */ ! 986: int backref; /* Backreference number */ ! 987: int eval; /* If the replacement string should be eval'ed */ ! 988: int start_offset; /* Where the new search starts */ ! 989: int g_notempty=0; /* If the match should not be empty */ ! 990: int replace_len=0; /* Length of replacement string */ ! 991: char *result, /* Result of replacement */ ! 992: *replace=NULL, /* Replacement string */ ! 993: *new_buf, /* Temporary buffer for re-allocation */ ! 994: *walkbuf, /* Location of current replacement in the result */ ! 995: *walk, /* Used to walk the replacement string */ ! 996: *match, /* The current match */ ! 997: *piece, /* The current piece of subject */ ! 998: *replace_end=NULL, /* End of replacement string */ ! 999: *eval_result, /* Result of eval or custom function */ ! 1000: walk_last; /* Last walked character */ ! 1001: int rc; ! 1002: ! 1003: if (extra == NULL) { ! 1004: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; ! 1005: extra = &extra_data; ! 1006: } ! 1007: extra->match_limit = PCRE_G(backtrack_limit); ! 1008: extra->match_limit_recursion = PCRE_G(recursion_limit); ! 1009: ! 1010: eval = pce->preg_options & PREG_REPLACE_EVAL; ! 1011: if (is_callable_replace) { ! 1012: if (eval) { ! 1013: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback"); ! 1014: return NULL; ! 1015: } ! 1016: } else { ! 1017: replace = Z_STRVAL_P(replace_val); ! 1018: replace_len = Z_STRLEN_P(replace_val); ! 1019: replace_end = replace + replace_len; ! 1020: } ! 1021: ! 1022: /* Calculate the size of the offsets array, and allocate memory for it. */ ! 1023: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); ! 1024: if (rc < 0) { ! 1025: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); ! 1026: return NULL; ! 1027: } ! 1028: num_subpats++; ! 1029: size_offsets = num_subpats * 3; ! 1030: ! 1031: /* ! 1032: * Build a mapping from subpattern numbers to their names. We will always ! 1033: * allocate the table, even though there may be no named subpatterns. This ! 1034: * avoids somewhat more complicated logic in the inner loops. ! 1035: */ ! 1036: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); ! 1037: if (!subpat_names) { ! 1038: return NULL; ! 1039: } ! 1040: ! 1041: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); ! 1042: ! 1043: alloc_len = 2 * subject_len + 1; ! 1044: result = safe_emalloc(alloc_len, sizeof(char), 0); ! 1045: ! 1046: /* Initialize */ ! 1047: match = NULL; ! 1048: *result_len = 0; ! 1049: start_offset = 0; ! 1050: PCRE_G(error_code) = PHP_PCRE_NO_ERROR; ! 1051: ! 1052: while (1) { ! 1053: /* Execute the regular expression. */ ! 1054: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, ! 1055: exoptions|g_notempty, offsets, size_offsets); ! 1056: ! 1057: /* the string was already proved to be valid UTF-8 */ ! 1058: exoptions |= PCRE_NO_UTF8_CHECK; ! 1059: ! 1060: /* Check for too many substrings condition. */ ! 1061: if (count == 0) { ! 1062: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); ! 1063: count = size_offsets/3; ! 1064: } ! 1065: ! 1066: piece = subject + start_offset; ! 1067: ! 1068: if (count > 0 && (limit == -1 || limit > 0)) { ! 1069: if (replace_count) { ! 1070: ++*replace_count; ! 1071: } ! 1072: /* Set the match location in subject */ ! 1073: match = subject + offsets[0]; ! 1074: ! 1075: new_len = *result_len + offsets[0] - start_offset; /* part before the match */ ! 1076: ! 1077: /* If evaluating, do it and add the return string's length */ ! 1078: if (eval) { ! 1079: eval_result_len = preg_do_eval(replace, replace_len, subject, ! 1080: offsets, count, &eval_result TSRMLS_CC); ! 1081: new_len += eval_result_len; ! 1082: } else if (is_callable_replace) { ! 1083: /* Use custom function to get replacement string and its length. */ ! 1084: eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); ! 1085: new_len += eval_result_len; ! 1086: } else { /* do regular substitution */ ! 1087: walk = replace; ! 1088: walk_last = 0; ! 1089: while (walk < replace_end) { ! 1090: if ('\\' == *walk || '$' == *walk) { ! 1091: if (walk_last == '\\') { ! 1092: walk++; ! 1093: walk_last = 0; ! 1094: continue; ! 1095: } ! 1096: if (preg_get_backref(&walk, &backref)) { ! 1097: if (backref < count) ! 1098: new_len += offsets[(backref<<1)+1] - offsets[backref<<1]; ! 1099: continue; ! 1100: } ! 1101: } ! 1102: new_len++; ! 1103: walk++; ! 1104: walk_last = walk[-1]; ! 1105: } ! 1106: } ! 1107: ! 1108: if (new_len + 1 > alloc_len) { ! 1109: alloc_len = 1 + alloc_len + 2 * new_len; ! 1110: new_buf = emalloc(alloc_len); ! 1111: memcpy(new_buf, result, *result_len); ! 1112: efree(result); ! 1113: result = new_buf; ! 1114: } ! 1115: /* copy the part of the string before the match */ ! 1116: memcpy(&result[*result_len], piece, match-piece); ! 1117: *result_len += match-piece; ! 1118: ! 1119: /* copy replacement and backrefs */ ! 1120: walkbuf = result + *result_len; ! 1121: ! 1122: /* If evaluating or using custom function, copy result to the buffer ! 1123: * and clean up. */ ! 1124: if (eval || is_callable_replace) { ! 1125: memcpy(walkbuf, eval_result, eval_result_len); ! 1126: *result_len += eval_result_len; ! 1127: STR_FREE(eval_result); ! 1128: } else { /* do regular backreference copying */ ! 1129: walk = replace; ! 1130: walk_last = 0; ! 1131: while (walk < replace_end) { ! 1132: if ('\\' == *walk || '$' == *walk) { ! 1133: if (walk_last == '\\') { ! 1134: *(walkbuf-1) = *walk++; ! 1135: walk_last = 0; ! 1136: continue; ! 1137: } ! 1138: if (preg_get_backref(&walk, &backref)) { ! 1139: if (backref < count) { ! 1140: match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; ! 1141: memcpy(walkbuf, subject + offsets[backref<<1], match_len); ! 1142: walkbuf += match_len; ! 1143: } ! 1144: continue; ! 1145: } ! 1146: } ! 1147: *walkbuf++ = *walk++; ! 1148: walk_last = walk[-1]; ! 1149: } ! 1150: *walkbuf = '\0'; ! 1151: /* increment the result length by how much we've added to the string */ ! 1152: *result_len += walkbuf - (result + *result_len); ! 1153: } ! 1154: ! 1155: if (limit != -1) ! 1156: limit--; ! 1157: ! 1158: } else if (count == PCRE_ERROR_NOMATCH || limit == 0) { ! 1159: /* If we previously set PCRE_NOTEMPTY after a null match, ! 1160: this is not necessarily the end. We need to advance ! 1161: the start offset, and continue. Fudge the offset values ! 1162: to achieve this, unless we're already at the end of the string. */ ! 1163: if (g_notempty != 0 && start_offset < subject_len) { ! 1164: offsets[0] = start_offset; ! 1165: offsets[1] = start_offset + 1; ! 1166: memcpy(&result[*result_len], piece, 1); ! 1167: (*result_len)++; ! 1168: } else { ! 1169: new_len = *result_len + subject_len - start_offset; ! 1170: if (new_len + 1 > alloc_len) { ! 1171: alloc_len = new_len + 1; /* now we know exactly how long it is */ ! 1172: new_buf = safe_emalloc(alloc_len, sizeof(char), 0); ! 1173: memcpy(new_buf, result, *result_len); ! 1174: efree(result); ! 1175: result = new_buf; ! 1176: } ! 1177: /* stick that last bit of string on our output */ ! 1178: memcpy(&result[*result_len], piece, subject_len - start_offset); ! 1179: *result_len += subject_len - start_offset; ! 1180: result[*result_len] = '\0'; ! 1181: break; ! 1182: } ! 1183: } else { ! 1184: pcre_handle_exec_error(count TSRMLS_CC); ! 1185: efree(result); ! 1186: result = NULL; ! 1187: break; ! 1188: } ! 1189: ! 1190: /* If we have matched an empty string, mimic what Perl's /g options does. ! 1191: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try ! 1192: the match again at the same point. If this fails (picked up above) we ! 1193: advance to the next character. */ ! 1194: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; ! 1195: ! 1196: /* Advance to the next piece. */ ! 1197: start_offset = offsets[1]; ! 1198: } ! 1199: ! 1200: efree(offsets); ! 1201: efree(subpat_names); ! 1202: ! 1203: return result; ! 1204: } ! 1205: /* }}} */ ! 1206: ! 1207: /* {{{ php_replace_in_subject ! 1208: */ ! 1209: static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC) ! 1210: { ! 1211: zval **regex_entry, ! 1212: **replace_entry = NULL, ! 1213: *replace_value, ! 1214: empty_replace; ! 1215: char *subject_value, ! 1216: *result; ! 1217: int subject_len; ! 1218: ! 1219: /* Make sure we're dealing with strings. */ ! 1220: convert_to_string_ex(subject); ! 1221: /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ ! 1222: ZVAL_STRINGL(&empty_replace, "", 0, 0); ! 1223: ! 1224: /* If regex is an array */ ! 1225: if (Z_TYPE_P(regex) == IS_ARRAY) { ! 1226: /* Duplicate subject string for repeated replacement */ ! 1227: subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject)); ! 1228: subject_len = Z_STRLEN_PP(subject); ! 1229: *result_len = subject_len; ! 1230: ! 1231: zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex)); ! 1232: ! 1233: replace_value = replace; ! 1234: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) ! 1235: zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace)); ! 1236: ! 1237: /* For each entry in the regex array, get the entry */ ! 1238: while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) { ! 1239: /* Make sure we're dealing with strings. */ ! 1240: convert_to_string_ex(regex_entry); ! 1241: ! 1242: /* If replace is an array and not a callable construct */ ! 1243: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) { ! 1244: /* Get current entry */ ! 1245: if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) { ! 1246: if (!is_callable_replace) { ! 1247: convert_to_string_ex(replace_entry); ! 1248: } ! 1249: replace_value = *replace_entry; ! 1250: zend_hash_move_forward(Z_ARRVAL_P(replace)); ! 1251: } else { ! 1252: /* We've run out of replacement strings, so use an empty one */ ! 1253: replace_value = &empty_replace; ! 1254: } ! 1255: } ! 1256: ! 1257: /* Do the actual replacement and put the result back into subject_value ! 1258: for further replacements. */ ! 1259: if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry), ! 1260: Z_STRLEN_PP(regex_entry), ! 1261: subject_value, ! 1262: subject_len, ! 1263: replace_value, ! 1264: is_callable_replace, ! 1265: result_len, ! 1266: limit, ! 1267: replace_count TSRMLS_CC)) != NULL) { ! 1268: efree(subject_value); ! 1269: subject_value = result; ! 1270: subject_len = *result_len; ! 1271: } else { ! 1272: efree(subject_value); ! 1273: return NULL; ! 1274: } ! 1275: ! 1276: zend_hash_move_forward(Z_ARRVAL_P(regex)); ! 1277: } ! 1278: ! 1279: return subject_value; ! 1280: } else { ! 1281: result = php_pcre_replace(Z_STRVAL_P(regex), ! 1282: Z_STRLEN_P(regex), ! 1283: Z_STRVAL_PP(subject), ! 1284: Z_STRLEN_PP(subject), ! 1285: replace, ! 1286: is_callable_replace, ! 1287: result_len, ! 1288: limit, ! 1289: replace_count TSRMLS_CC); ! 1290: return result; ! 1291: } ! 1292: } ! 1293: /* }}} */ ! 1294: ! 1295: /* {{{ preg_replace_impl ! 1296: */ ! 1297: static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter) ! 1298: { ! 1299: zval **regex, ! 1300: **replace, ! 1301: **subject, ! 1302: **subject_entry, ! 1303: **zcount = NULL; ! 1304: char *result; ! 1305: int result_len; ! 1306: int limit_val = -1; ! 1307: long limit = -1; ! 1308: char *string_key; ! 1309: ulong num_key; ! 1310: char *callback_name; ! 1311: int replace_count=0, old_replace_count; ! 1312: ! 1313: /* Get function parameters and do error-checking. */ ! 1314: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) { ! 1315: return; ! 1316: } ! 1317: ! 1318: if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) { ! 1319: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); ! 1320: RETURN_FALSE; ! 1321: } ! 1322: ! 1323: SEPARATE_ZVAL(replace); ! 1324: if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) { ! 1325: convert_to_string_ex(replace); ! 1326: } ! 1327: if (is_callable_replace) { ! 1328: if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) { ! 1329: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name); ! 1330: efree(callback_name); ! 1331: MAKE_COPY_ZVAL(subject, return_value); ! 1332: return; ! 1333: } ! 1334: efree(callback_name); ! 1335: } ! 1336: ! 1337: SEPARATE_ZVAL(regex); ! 1338: SEPARATE_ZVAL(subject); ! 1339: ! 1340: if (ZEND_NUM_ARGS() > 3) { ! 1341: limit_val = limit; ! 1342: } ! 1343: ! 1344: if (Z_TYPE_PP(regex) != IS_ARRAY) ! 1345: convert_to_string_ex(regex); ! 1346: ! 1347: /* if subject is an array */ ! 1348: if (Z_TYPE_PP(subject) == IS_ARRAY) { ! 1349: array_init(return_value); ! 1350: zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject)); ! 1351: ! 1352: /* For each subject entry, convert it to string, then perform replacement ! 1353: and add the result to the return_value array. */ ! 1354: while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) { ! 1355: SEPARATE_ZVAL(subject_entry); ! 1356: old_replace_count = replace_count; ! 1357: if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { ! 1358: if (!is_filter || replace_count > old_replace_count) { ! 1359: /* Add to return array */ ! 1360: switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0)) ! 1361: { ! 1362: case HASH_KEY_IS_STRING: ! 1363: add_assoc_stringl(return_value, string_key, result, result_len, 0); ! 1364: break; ! 1365: ! 1366: case HASH_KEY_IS_LONG: ! 1367: add_index_stringl(return_value, num_key, result, result_len, 0); ! 1368: break; ! 1369: } ! 1370: } else { ! 1371: efree(result); ! 1372: } ! 1373: } ! 1374: ! 1375: zend_hash_move_forward(Z_ARRVAL_PP(subject)); ! 1376: } ! 1377: } else { /* if subject is not an array */ ! 1378: old_replace_count = replace_count; ! 1379: if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { ! 1380: if (!is_filter || replace_count > old_replace_count) { ! 1381: RETVAL_STRINGL(result, result_len, 0); ! 1382: } else { ! 1383: efree(result); ! 1384: } ! 1385: } ! 1386: } ! 1387: if (ZEND_NUM_ARGS() > 4) { ! 1388: zval_dtor(*zcount); ! 1389: ZVAL_LONG(*zcount, replace_count); ! 1390: } ! 1391: ! 1392: } ! 1393: /* }}} */ ! 1394: ! 1395: /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) ! 1396: Perform Perl-style regular expression replacement. */ ! 1397: static PHP_FUNCTION(preg_replace) ! 1398: { ! 1399: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); ! 1400: } ! 1401: /* }}} */ ! 1402: ! 1403: /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]]) ! 1404: Perform Perl-style regular expression replacement using replacement callback. */ ! 1405: static PHP_FUNCTION(preg_replace_callback) ! 1406: { ! 1407: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0); ! 1408: } ! 1409: /* }}} */ ! 1410: ! 1411: /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) ! 1412: Perform Perl-style regular expression replacement and only return matches. */ ! 1413: static PHP_FUNCTION(preg_filter) ! 1414: { ! 1415: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); ! 1416: } ! 1417: /* }}} */ ! 1418: ! 1419: /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) ! 1420: Split string into an array using a perl-style regular expression as a delimiter */ ! 1421: static PHP_FUNCTION(preg_split) ! 1422: { ! 1423: char *regex; /* Regular expression */ ! 1424: char *subject; /* String to match against */ ! 1425: int regex_len; ! 1426: int subject_len; ! 1427: long limit_val = -1;/* Integer value of limit */ ! 1428: long flags = 0; /* Match control flags */ ! 1429: pcre_cache_entry *pce; /* Compiled regular expression */ ! 1430: ! 1431: /* Get function parameters and do error checking */ ! 1432: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len, ! 1433: &subject, &subject_len, &limit_val, &flags) == FAILURE) { ! 1434: RETURN_FALSE; ! 1435: } ! 1436: ! 1437: /* Compile regex or get it from cache. */ ! 1438: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { ! 1439: RETURN_FALSE; ! 1440: } ! 1441: ! 1442: php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC); ! 1443: } ! 1444: /* }}} */ ! 1445: ! 1446: /* {{{ php_pcre_split ! 1447: */ ! 1448: PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, ! 1449: long limit_val, long flags TSRMLS_DC) ! 1450: { ! 1451: pcre_extra *extra = NULL; /* Holds results of studying */ ! 1452: pcre *re_bump = NULL; /* Regex instance for empty matches */ ! 1453: pcre_extra *extra_bump = NULL; /* Almost dummy */ ! 1454: pcre_extra extra_data; /* Used locally for exec options */ ! 1455: int *offsets; /* Array of subpattern offsets */ ! 1456: int size_offsets; /* Size of the offsets array */ ! 1457: int exoptions = 0; /* Execution options */ ! 1458: int count = 0; /* Count of matched subpatterns */ ! 1459: int start_offset; /* Where the new search starts */ ! 1460: int next_offset; /* End of the last delimiter match + 1 */ ! 1461: int g_notempty = 0; /* If the match should not be empty */ ! 1462: char *last_match; /* Location of last match */ ! 1463: int rc; ! 1464: int no_empty; /* If NO_EMPTY flag is set */ ! 1465: int delim_capture; /* If delimiters should be captured */ ! 1466: int offset_capture; /* If offsets should be captured */ ! 1467: ! 1468: no_empty = flags & PREG_SPLIT_NO_EMPTY; ! 1469: delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; ! 1470: offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE; ! 1471: ! 1472: if (limit_val == 0) { ! 1473: limit_val = -1; ! 1474: } ! 1475: ! 1476: if (extra == NULL) { ! 1477: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; ! 1478: extra = &extra_data; ! 1479: } ! 1480: extra->match_limit = PCRE_G(backtrack_limit); ! 1481: extra->match_limit_recursion = PCRE_G(recursion_limit); ! 1482: ! 1483: /* Initialize return value */ ! 1484: array_init(return_value); ! 1485: ! 1486: /* Calculate the size of the offsets array, and allocate memory for it. */ ! 1487: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); ! 1488: if (rc < 0) { ! 1489: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); ! 1490: RETURN_FALSE; ! 1491: } ! 1492: size_offsets = (size_offsets + 1) * 3; ! 1493: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); ! 1494: ! 1495: /* Start at the beginning of the string */ ! 1496: start_offset = 0; ! 1497: next_offset = 0; ! 1498: last_match = subject; ! 1499: PCRE_G(error_code) = PHP_PCRE_NO_ERROR; ! 1500: ! 1501: /* Get next piece if no limit or limit not yet reached and something matched*/ ! 1502: while ((limit_val == -1 || limit_val > 1)) { ! 1503: count = pcre_exec(pce->re, extra, subject, ! 1504: subject_len, start_offset, ! 1505: exoptions|g_notempty, offsets, size_offsets); ! 1506: ! 1507: /* the string was already proved to be valid UTF-8 */ ! 1508: exoptions |= PCRE_NO_UTF8_CHECK; ! 1509: ! 1510: /* Check for too many substrings condition. */ ! 1511: if (count == 0) { ! 1512: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); ! 1513: count = size_offsets/3; ! 1514: } ! 1515: ! 1516: /* If something matched */ ! 1517: if (count > 0) { ! 1518: if (!no_empty || &subject[offsets[0]] != last_match) { ! 1519: ! 1520: if (offset_capture) { ! 1521: /* Add (match, offset) pair to the return value */ ! 1522: add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL); ! 1523: } else { ! 1524: /* Add the piece to the return value */ ! 1525: add_next_index_stringl(return_value, last_match, ! 1526: &subject[offsets[0]]-last_match, 1); ! 1527: } ! 1528: ! 1529: /* One less left to do */ ! 1530: if (limit_val != -1) ! 1531: limit_val--; ! 1532: } ! 1533: ! 1534: last_match = &subject[offsets[1]]; ! 1535: next_offset = offsets[1]; ! 1536: ! 1537: if (delim_capture) { ! 1538: int i, match_len; ! 1539: for (i = 1; i < count; i++) { ! 1540: match_len = offsets[(i<<1)+1] - offsets[i<<1]; ! 1541: /* If we have matched a delimiter */ ! 1542: if (!no_empty || match_len > 0) { ! 1543: if (offset_capture) { ! 1544: add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL); ! 1545: } else { ! 1546: add_next_index_stringl(return_value, ! 1547: &subject[offsets[i<<1]], ! 1548: match_len, 1); ! 1549: } ! 1550: } ! 1551: } ! 1552: } ! 1553: } else if (count == PCRE_ERROR_NOMATCH) { ! 1554: /* If we previously set PCRE_NOTEMPTY after a null match, ! 1555: this is not necessarily the end. We need to advance ! 1556: the start offset, and continue. Fudge the offset values ! 1557: to achieve this, unless we're already at the end of the string. */ ! 1558: if (g_notempty != 0 && start_offset < subject_len) { ! 1559: if (pce->compile_options & PCRE_UTF8) { ! 1560: if (re_bump == NULL) { ! 1561: int dummy; ! 1562: ! 1563: if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) { ! 1564: RETURN_FALSE; ! 1565: } ! 1566: } ! 1567: count = pcre_exec(re_bump, extra_bump, subject, ! 1568: subject_len, start_offset, ! 1569: exoptions, offsets, size_offsets); ! 1570: if (count < 1) { ! 1571: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); ! 1572: RETURN_FALSE; ! 1573: } ! 1574: } else { ! 1575: offsets[0] = start_offset; ! 1576: offsets[1] = start_offset + 1; ! 1577: } ! 1578: } else ! 1579: break; ! 1580: } else { ! 1581: pcre_handle_exec_error(count TSRMLS_CC); ! 1582: break; ! 1583: } ! 1584: ! 1585: /* If we have matched an empty string, mimic what Perl's /g options does. ! 1586: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try ! 1587: the match again at the same point. If this fails (picked up above) we ! 1588: advance to the next character. */ ! 1589: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; ! 1590: ! 1591: /* Advance to the position right after the last full match */ ! 1592: start_offset = offsets[1]; ! 1593: } ! 1594: ! 1595: ! 1596: start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */ ! 1597: ! 1598: if (!no_empty || start_offset < subject_len) ! 1599: { ! 1600: if (offset_capture) { ! 1601: /* Add the last (match, offset) pair to the return value */ ! 1602: add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL); ! 1603: } else { ! 1604: /* Add the last piece to the return value */ ! 1605: add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1); ! 1606: } ! 1607: } ! 1608: ! 1609: ! 1610: /* Clean up */ ! 1611: efree(offsets); ! 1612: } ! 1613: /* }}} */ ! 1614: ! 1615: /* {{{ proto string preg_quote(string str [, string delim_char]) ! 1616: Quote regular expression characters plus an optional character */ ! 1617: static PHP_FUNCTION(preg_quote) ! 1618: { ! 1619: int in_str_len; ! 1620: char *in_str; /* Input string argument */ ! 1621: char *in_str_end; /* End of the input string */ ! 1622: int delim_len = 0; ! 1623: char *delim = NULL; /* Additional delimiter argument */ ! 1624: char *out_str, /* Output string with quoted characters */ ! 1625: *p, /* Iterator for input string */ ! 1626: *q, /* Iterator for output string */ ! 1627: delim_char=0, /* Delimiter character to be quoted */ ! 1628: c; /* Current character */ ! 1629: zend_bool quote_delim = 0; /* Whether to quote additional delim char */ ! 1630: ! 1631: /* Get the arguments and check for errors */ ! 1632: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len, ! 1633: &delim, &delim_len) == FAILURE) { ! 1634: return; ! 1635: } ! 1636: ! 1637: in_str_end = in_str + in_str_len; ! 1638: ! 1639: /* Nothing to do if we got an empty string */ ! 1640: if (in_str == in_str_end) { ! 1641: RETURN_EMPTY_STRING(); ! 1642: } ! 1643: ! 1644: if (delim && *delim) { ! 1645: delim_char = delim[0]; ! 1646: quote_delim = 1; ! 1647: } ! 1648: ! 1649: /* Allocate enough memory so that even if each character ! 1650: is quoted, we won't run out of room */ ! 1651: out_str = safe_emalloc(4, in_str_len, 1); ! 1652: ! 1653: /* Go through the string and quote necessary characters */ ! 1654: for(p = in_str, q = out_str; p != in_str_end; p++) { ! 1655: c = *p; ! 1656: switch(c) { ! 1657: case '.': ! 1658: case '\\': ! 1659: case '+': ! 1660: case '*': ! 1661: case '?': ! 1662: case '[': ! 1663: case '^': ! 1664: case ']': ! 1665: case '$': ! 1666: case '(': ! 1667: case ')': ! 1668: case '{': ! 1669: case '}': ! 1670: case '=': ! 1671: case '!': ! 1672: case '>': ! 1673: case '<': ! 1674: case '|': ! 1675: case ':': ! 1676: case '-': ! 1677: *q++ = '\\'; ! 1678: *q++ = c; ! 1679: break; ! 1680: ! 1681: case '\0': ! 1682: *q++ = '\\'; ! 1683: *q++ = '0'; ! 1684: *q++ = '0'; ! 1685: *q++ = '0'; ! 1686: break; ! 1687: ! 1688: default: ! 1689: if (quote_delim && c == delim_char) ! 1690: *q++ = '\\'; ! 1691: *q++ = c; ! 1692: break; ! 1693: } ! 1694: } ! 1695: *q = '\0'; ! 1696: ! 1697: /* Reallocate string and return it */ ! 1698: RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0); ! 1699: } ! 1700: /* }}} */ ! 1701: ! 1702: /* {{{ proto array preg_grep(string regex, array input [, int flags]) ! 1703: Searches array and returns entries which match regex */ ! 1704: static PHP_FUNCTION(preg_grep) ! 1705: { ! 1706: char *regex; /* Regular expression */ ! 1707: int regex_len; ! 1708: zval *input; /* Input array */ ! 1709: long flags = 0; /* Match control flags */ ! 1710: pcre_cache_entry *pce; /* Compiled regular expression */ ! 1711: ! 1712: /* Get arguments and do error checking */ ! 1713: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len, ! 1714: &input, &flags) == FAILURE) { ! 1715: return; ! 1716: } ! 1717: ! 1718: /* Compile regex or get it from cache. */ ! 1719: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { ! 1720: RETURN_FALSE; ! 1721: } ! 1722: ! 1723: php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC); ! 1724: } ! 1725: /* }}} */ ! 1726: ! 1727: PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */ ! 1728: { ! 1729: zval **entry; /* An entry in the input array */ ! 1730: pcre_extra *extra = pce->extra;/* Holds results of studying */ ! 1731: pcre_extra extra_data; /* Used locally for exec options */ ! 1732: int *offsets; /* Array of subpattern offsets */ ! 1733: int size_offsets; /* Size of the offsets array */ ! 1734: int count = 0; /* Count of matched subpatterns */ ! 1735: char *string_key; ! 1736: ulong num_key; ! 1737: zend_bool invert; /* Whether to return non-matching ! 1738: entries */ ! 1739: int rc; ! 1740: ! 1741: invert = flags & PREG_GREP_INVERT ? 1 : 0; ! 1742: ! 1743: if (extra == NULL) { ! 1744: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; ! 1745: extra = &extra_data; ! 1746: } ! 1747: extra->match_limit = PCRE_G(backtrack_limit); ! 1748: extra->match_limit_recursion = PCRE_G(recursion_limit); ! 1749: ! 1750: /* Calculate the size of the offsets array, and allocate memory for it. */ ! 1751: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); ! 1752: if (rc < 0) { ! 1753: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); ! 1754: RETURN_FALSE; ! 1755: } ! 1756: size_offsets = (size_offsets + 1) * 3; ! 1757: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); ! 1758: ! 1759: /* Initialize return array */ ! 1760: array_init(return_value); ! 1761: ! 1762: PCRE_G(error_code) = PHP_PCRE_NO_ERROR; ! 1763: ! 1764: /* Go through the input array */ ! 1765: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); ! 1766: while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { ! 1767: zval subject = **entry; ! 1768: ! 1769: if (Z_TYPE_PP(entry) != IS_STRING) { ! 1770: zval_copy_ctor(&subject); ! 1771: convert_to_string(&subject); ! 1772: } ! 1773: ! 1774: /* Perform the match */ ! 1775: count = pcre_exec(pce->re, extra, Z_STRVAL(subject), ! 1776: Z_STRLEN(subject), 0, ! 1777: 0, offsets, size_offsets); ! 1778: ! 1779: /* Check for too many substrings condition. */ ! 1780: if (count == 0) { ! 1781: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); ! 1782: count = size_offsets/3; ! 1783: } else if (count < 0 && count != PCRE_ERROR_NOMATCH) { ! 1784: pcre_handle_exec_error(count TSRMLS_CC); ! 1785: break; ! 1786: } ! 1787: ! 1788: /* If the entry fits our requirements */ ! 1789: if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) { ! 1790: ! 1791: Z_ADDREF_PP(entry); ! 1792: ! 1793: /* Add to return array */ ! 1794: switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) ! 1795: { ! 1796: case HASH_KEY_IS_STRING: ! 1797: zend_hash_update(Z_ARRVAL_P(return_value), string_key, ! 1798: strlen(string_key)+1, entry, sizeof(zval *), NULL); ! 1799: break; ! 1800: ! 1801: case HASH_KEY_IS_LONG: ! 1802: zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry, ! 1803: sizeof(zval *), NULL); ! 1804: break; ! 1805: } ! 1806: } ! 1807: ! 1808: if (Z_TYPE_PP(entry) != IS_STRING) { ! 1809: zval_dtor(&subject); ! 1810: } ! 1811: ! 1812: zend_hash_move_forward(Z_ARRVAL_P(input)); ! 1813: } ! 1814: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); ! 1815: /* Clean up */ ! 1816: efree(offsets); ! 1817: } ! 1818: /* }}} */ ! 1819: ! 1820: /* {{{ proto int preg_last_error() ! 1821: Returns the error code of the last regexp execution. */ ! 1822: static PHP_FUNCTION(preg_last_error) ! 1823: { ! 1824: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) { ! 1825: return; ! 1826: } ! 1827: ! 1828: RETURN_LONG(PCRE_G(error_code)); ! 1829: } ! 1830: /* }}} */ ! 1831: ! 1832: /* {{{ module definition structures */ ! 1833: ! 1834: /* {{{ arginfo */ ! 1835: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2) ! 1836: ZEND_ARG_INFO(0, pattern) ! 1837: ZEND_ARG_INFO(0, subject) ! 1838: ZEND_ARG_INFO(1, subpatterns) /* array */ ! 1839: ZEND_ARG_INFO(0, flags) ! 1840: ZEND_ARG_INFO(0, offset) ! 1841: ZEND_END_ARG_INFO() ! 1842: ! 1843: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3) ! 1844: ZEND_ARG_INFO(0, pattern) ! 1845: ZEND_ARG_INFO(0, subject) ! 1846: ZEND_ARG_INFO(1, subpatterns) /* array */ ! 1847: ZEND_ARG_INFO(0, flags) ! 1848: ZEND_ARG_INFO(0, offset) ! 1849: ZEND_END_ARG_INFO() ! 1850: ! 1851: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3) ! 1852: ZEND_ARG_INFO(0, regex) ! 1853: ZEND_ARG_INFO(0, replace) ! 1854: ZEND_ARG_INFO(0, subject) ! 1855: ZEND_ARG_INFO(0, limit) ! 1856: ZEND_ARG_INFO(1, count) ! 1857: ZEND_END_ARG_INFO() ! 1858: ! 1859: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3) ! 1860: ZEND_ARG_INFO(0, regex) ! 1861: ZEND_ARG_INFO(0, callback) ! 1862: ZEND_ARG_INFO(0, subject) ! 1863: ZEND_ARG_INFO(0, limit) ! 1864: ZEND_ARG_INFO(1, count) ! 1865: ZEND_END_ARG_INFO() ! 1866: ! 1867: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2) ! 1868: ZEND_ARG_INFO(0, pattern) ! 1869: ZEND_ARG_INFO(0, subject) ! 1870: ZEND_ARG_INFO(0, limit) ! 1871: ZEND_ARG_INFO(0, flags) ! 1872: ZEND_END_ARG_INFO() ! 1873: ! 1874: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1) ! 1875: ZEND_ARG_INFO(0, str) ! 1876: ZEND_ARG_INFO(0, delim_char) ! 1877: ZEND_END_ARG_INFO() ! 1878: ! 1879: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2) ! 1880: ZEND_ARG_INFO(0, regex) ! 1881: ZEND_ARG_INFO(0, input) /* array */ ! 1882: ZEND_ARG_INFO(0, flags) ! 1883: ZEND_END_ARG_INFO() ! 1884: ! 1885: ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0) ! 1886: ZEND_END_ARG_INFO() ! 1887: /* }}} */ ! 1888: ! 1889: static const zend_function_entry pcre_functions[] = { ! 1890: PHP_FE(preg_match, arginfo_preg_match) ! 1891: PHP_FE(preg_match_all, arginfo_preg_match_all) ! 1892: PHP_FE(preg_replace, arginfo_preg_replace) ! 1893: PHP_FE(preg_replace_callback, arginfo_preg_replace_callback) ! 1894: PHP_FE(preg_filter, arginfo_preg_replace) ! 1895: PHP_FE(preg_split, arginfo_preg_split) ! 1896: PHP_FE(preg_quote, arginfo_preg_quote) ! 1897: PHP_FE(preg_grep, arginfo_preg_grep) ! 1898: PHP_FE(preg_last_error, arginfo_preg_last_error) ! 1899: PHP_FE_END ! 1900: }; ! 1901: ! 1902: zend_module_entry pcre_module_entry = { ! 1903: STANDARD_MODULE_HEADER, ! 1904: "pcre", ! 1905: pcre_functions, ! 1906: PHP_MINIT(pcre), ! 1907: PHP_MSHUTDOWN(pcre), ! 1908: NULL, ! 1909: NULL, ! 1910: PHP_MINFO(pcre), ! 1911: NO_VERSION_YET, ! 1912: PHP_MODULE_GLOBALS(pcre), ! 1913: PHP_GINIT(pcre), ! 1914: PHP_GSHUTDOWN(pcre), ! 1915: NULL, ! 1916: STANDARD_MODULE_PROPERTIES_EX ! 1917: }; ! 1918: ! 1919: #ifdef COMPILE_DL_PCRE ! 1920: ZEND_GET_MODULE(pcre) ! 1921: #endif ! 1922: ! 1923: /* }}} */ ! 1924: ! 1925: #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */ ! 1926: ! 1927: /* ! 1928: * Local variables: ! 1929: * tab-width: 4 ! 1930: * c-basic-offset: 4 ! 1931: * End: ! 1932: * vim600: sw=4 ts=4 fdm=marker ! 1933: * vim<600: sw=4 ts=4 ! 1934: */