Annotation of embedaddon/php/ext/pcre/php_pcre.c, revision 1.1
1.1 ! misho 1: /*
! 2: +----------------------------------------------------------------------+
! 3: | PHP Version 5 |
! 4: +----------------------------------------------------------------------+
! 5: | Copyright (c) 1997-2012 The PHP Group |
! 6: +----------------------------------------------------------------------+
! 7: | This source file is subject to version 3.01 of the PHP license, |
! 8: | that is bundled with this package in the file LICENSE, and is |
! 9: | available through the world-wide-web at the following url: |
! 10: | http://www.php.net/license/3_01.txt |
! 11: | If you did not receive a copy of the PHP license and are unable to |
! 12: | obtain it through the world-wide-web, please send a note to |
! 13: | license@php.net so we can mail you a copy immediately. |
! 14: +----------------------------------------------------------------------+
! 15: | Author: Andrei Zmievski <andrei@php.net> |
! 16: +----------------------------------------------------------------------+
! 17: */
! 18:
! 19: /* $Id: php_pcre.c 321634 2012-01-01 13:15:04Z felipe $ */
! 20:
! 21: #include "php.h"
! 22: #include "php_ini.h"
! 23: #include "php_globals.h"
! 24: #include "php_pcre.h"
! 25: #include "ext/standard/info.h"
! 26: #include "ext/standard/php_smart_str.h"
! 27:
! 28: #if HAVE_PCRE || HAVE_BUNDLED_PCRE
! 29:
! 30: #include "ext/standard/php_string.h"
! 31:
! 32: #define PREG_PATTERN_ORDER 1
! 33: #define PREG_SET_ORDER 2
! 34: #define PREG_OFFSET_CAPTURE (1<<8)
! 35:
! 36: #define PREG_SPLIT_NO_EMPTY (1<<0)
! 37: #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
! 38: #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
! 39:
! 40: #define PREG_REPLACE_EVAL (1<<0)
! 41:
! 42: #define PREG_GREP_INVERT (1<<0)
! 43:
! 44: #define PCRE_CACHE_SIZE 4096
! 45:
! 46: enum {
! 47: PHP_PCRE_NO_ERROR = 0,
! 48: PHP_PCRE_INTERNAL_ERROR,
! 49: PHP_PCRE_BACKTRACK_LIMIT_ERROR,
! 50: PHP_PCRE_RECURSION_LIMIT_ERROR,
! 51: PHP_PCRE_BAD_UTF8_ERROR,
! 52: PHP_PCRE_BAD_UTF8_OFFSET_ERROR
! 53: };
! 54:
! 55:
! 56: ZEND_DECLARE_MODULE_GLOBALS(pcre)
! 57:
! 58:
! 59: static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
! 60: {
! 61: int preg_code = 0;
! 62:
! 63: switch (pcre_code) {
! 64: case PCRE_ERROR_MATCHLIMIT:
! 65: preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
! 66: break;
! 67:
! 68: case PCRE_ERROR_RECURSIONLIMIT:
! 69: preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
! 70: break;
! 71:
! 72: case PCRE_ERROR_BADUTF8:
! 73: preg_code = PHP_PCRE_BAD_UTF8_ERROR;
! 74: break;
! 75:
! 76: case PCRE_ERROR_BADUTF8_OFFSET:
! 77: preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
! 78: break;
! 79:
! 80: default:
! 81: preg_code = PHP_PCRE_INTERNAL_ERROR;
! 82: break;
! 83: }
! 84:
! 85: PCRE_G(error_code) = preg_code;
! 86: }
! 87: /* }}} */
! 88:
! 89: static void php_free_pcre_cache(void *data) /* {{{ */
! 90: {
! 91: pcre_cache_entry *pce = (pcre_cache_entry *) data;
! 92: if (!pce) return;
! 93: pefree(pce->re, 1);
! 94: if (pce->extra) pefree(pce->extra, 1);
! 95: #if HAVE_SETLOCALE
! 96: if ((void*)pce->tables) pefree((void*)pce->tables, 1);
! 97: pefree(pce->locale, 1);
! 98: #endif
! 99: }
! 100: /* }}} */
! 101:
! 102: static PHP_GINIT_FUNCTION(pcre) /* {{{ */
! 103: {
! 104: zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
! 105: pcre_globals->backtrack_limit = 0;
! 106: pcre_globals->recursion_limit = 0;
! 107: pcre_globals->error_code = PHP_PCRE_NO_ERROR;
! 108: }
! 109: /* }}} */
! 110:
! 111: static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
! 112: {
! 113: zend_hash_destroy(&pcre_globals->pcre_cache);
! 114: }
! 115: /* }}} */
! 116:
! 117: PHP_INI_BEGIN()
! 118: STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
! 119: STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
! 120: PHP_INI_END()
! 121:
! 122:
! 123: /* {{{ PHP_MINFO_FUNCTION(pcre) */
! 124: static PHP_MINFO_FUNCTION(pcre)
! 125: {
! 126: php_info_print_table_start();
! 127: php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
! 128: php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
! 129: php_info_print_table_end();
! 130:
! 131: DISPLAY_INI_ENTRIES();
! 132: }
! 133: /* }}} */
! 134:
! 135: /* {{{ PHP_MINIT_FUNCTION(pcre) */
! 136: static PHP_MINIT_FUNCTION(pcre)
! 137: {
! 138: REGISTER_INI_ENTRIES();
! 139:
! 140: REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
! 141: REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
! 142: REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
! 143: REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
! 144: REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
! 145: REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
! 146: REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
! 147:
! 148: REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
! 149: REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
! 150: REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
! 151: REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
! 152: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
! 153: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
! 154: REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
! 155:
! 156: return SUCCESS;
! 157: }
! 158: /* }}} */
! 159:
! 160: /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
! 161: static PHP_MSHUTDOWN_FUNCTION(pcre)
! 162: {
! 163: UNREGISTER_INI_ENTRIES();
! 164:
! 165: return SUCCESS;
! 166: }
! 167: /* }}} */
! 168:
! 169: /* {{{ static pcre_clean_cache */
! 170: static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
! 171: {
! 172: int *num_clean = (int *)arg;
! 173:
! 174: if (*num_clean > 0) {
! 175: (*num_clean)--;
! 176: return 1;
! 177: } else {
! 178: return 0;
! 179: }
! 180: }
! 181: /* }}} */
! 182:
! 183: /* {{{ static make_subpats_table */
! 184: static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
! 185: {
! 186: pcre_extra *extra = pce->extra;
! 187: int name_cnt = 0, name_size, ni = 0;
! 188: int rc;
! 189: char *name_table;
! 190: unsigned short name_idx;
! 191: char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
! 192:
! 193: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
! 194: if (rc < 0) {
! 195: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
! 196: efree(subpat_names);
! 197: return NULL;
! 198: }
! 199: if (name_cnt > 0) {
! 200: int rc1, rc2;
! 201:
! 202: rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
! 203: rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
! 204: rc = rc2 ? rc2 : rc1;
! 205: if (rc < 0) {
! 206: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
! 207: efree(subpat_names);
! 208: return NULL;
! 209: }
! 210:
! 211: while (ni++ < name_cnt) {
! 212: name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
! 213: subpat_names[name_idx] = name_table + 2;
! 214: if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
! 215: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
! 216: efree(subpat_names);
! 217: return NULL;
! 218: }
! 219: name_table += name_size;
! 220: }
! 221: }
! 222:
! 223: return subpat_names;
! 224: }
! 225: /* }}} */
! 226:
! 227: /* {{{ pcre_get_compiled_regex_cache
! 228: */
! 229: PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
! 230: {
! 231: pcre *re = NULL;
! 232: pcre_extra *extra;
! 233: int coptions = 0;
! 234: int soptions = 0;
! 235: const char *error;
! 236: int erroffset;
! 237: char delimiter;
! 238: char start_delimiter;
! 239: char end_delimiter;
! 240: char *p, *pp;
! 241: char *pattern;
! 242: int do_study = 0;
! 243: int poptions = 0;
! 244: int count = 0;
! 245: unsigned const char *tables = NULL;
! 246: #if HAVE_SETLOCALE
! 247: char *locale = setlocale(LC_CTYPE, NULL);
! 248: #endif
! 249: pcre_cache_entry *pce;
! 250: pcre_cache_entry new_entry;
! 251:
! 252: /* Try to lookup the cached regex entry, and if successful, just pass
! 253: back the compiled pattern, otherwise go on and compile it. */
! 254: if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
! 255: /*
! 256: * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
! 257: * is, we flush it and compile the pattern from scratch.
! 258: */
! 259: if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
! 260: zend_hash_clean(&PCRE_G(pcre_cache));
! 261: } else {
! 262: #if HAVE_SETLOCALE
! 263: if (!strcmp(pce->locale, locale)) {
! 264: #endif
! 265: return pce;
! 266: #if HAVE_SETLOCALE
! 267: }
! 268: #endif
! 269: }
! 270: }
! 271:
! 272: p = regex;
! 273:
! 274: /* Parse through the leading whitespace, and display a warning if we
! 275: get to the end without encountering a delimiter. */
! 276: while (isspace((int)*(unsigned char *)p)) p++;
! 277: if (*p == 0) {
! 278: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
! 279: return NULL;
! 280: }
! 281:
! 282: /* Get the delimiter and display a warning if it is alphanumeric
! 283: or a backslash. */
! 284: delimiter = *p++;
! 285: if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
! 286: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
! 287: return NULL;
! 288: }
! 289:
! 290: start_delimiter = delimiter;
! 291: if ((pp = strchr("([{< )]}> )]}>", delimiter)))
! 292: delimiter = pp[5];
! 293: end_delimiter = delimiter;
! 294:
! 295: if (start_delimiter == end_delimiter) {
! 296: /* We need to iterate through the pattern, searching for the ending delimiter,
! 297: but skipping the backslashed delimiters. If the ending delimiter is not
! 298: found, display a warning. */
! 299: pp = p;
! 300: while (*pp != 0) {
! 301: if (*pp == '\\' && pp[1] != 0) pp++;
! 302: else if (*pp == delimiter)
! 303: break;
! 304: pp++;
! 305: }
! 306: if (*pp == 0) {
! 307: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
! 308: return NULL;
! 309: }
! 310: } else {
! 311: /* We iterate through the pattern, searching for the matching ending
! 312: * delimiter. For each matching starting delimiter, we increment nesting
! 313: * level, and decrement it for each matching ending delimiter. If we
! 314: * reach the end of the pattern without matching, display a warning.
! 315: */
! 316: int brackets = 1; /* brackets nesting level */
! 317: pp = p;
! 318: while (*pp != 0) {
! 319: if (*pp == '\\' && pp[1] != 0) pp++;
! 320: else if (*pp == end_delimiter && --brackets <= 0)
! 321: break;
! 322: else if (*pp == start_delimiter)
! 323: brackets++;
! 324: pp++;
! 325: }
! 326: if (*pp == 0) {
! 327: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
! 328: return NULL;
! 329: }
! 330: }
! 331:
! 332: /* Make a copy of the actual pattern. */
! 333: pattern = estrndup(p, pp-p);
! 334:
! 335: /* Move on to the options */
! 336: pp++;
! 337:
! 338: /* Parse through the options, setting appropriate flags. Display
! 339: a warning if we encounter an unknown modifier. */
! 340: while (*pp != 0) {
! 341: switch (*pp++) {
! 342: /* Perl compatible options */
! 343: case 'i': coptions |= PCRE_CASELESS; break;
! 344: case 'm': coptions |= PCRE_MULTILINE; break;
! 345: case 's': coptions |= PCRE_DOTALL; break;
! 346: case 'x': coptions |= PCRE_EXTENDED; break;
! 347:
! 348: /* PCRE specific options */
! 349: case 'A': coptions |= PCRE_ANCHORED; break;
! 350: case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
! 351: case 'S': do_study = 1; break;
! 352: case 'U': coptions |= PCRE_UNGREEDY; break;
! 353: case 'X': coptions |= PCRE_EXTRA; break;
! 354: case 'u': coptions |= PCRE_UTF8;
! 355: /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
! 356: characters, even in UTF-8 mode. However, this can be changed by setting
! 357: the PCRE_UCP option. */
! 358: #ifdef PCRE_UCP
! 359: coptions |= PCRE_UCP;
! 360: #endif
! 361: break;
! 362:
! 363: /* Custom preg options */
! 364: case 'e': poptions |= PREG_REPLACE_EVAL; break;
! 365:
! 366: case ' ':
! 367: case '\n':
! 368: break;
! 369:
! 370: default:
! 371: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
! 372: efree(pattern);
! 373: return NULL;
! 374: }
! 375: }
! 376:
! 377: #if HAVE_SETLOCALE
! 378: if (strcmp(locale, "C"))
! 379: tables = pcre_maketables();
! 380: #endif
! 381:
! 382: /* Compile pattern and display a warning if compilation failed. */
! 383: re = pcre_compile(pattern,
! 384: coptions,
! 385: &error,
! 386: &erroffset,
! 387: tables);
! 388:
! 389: if (re == NULL) {
! 390: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
! 391: efree(pattern);
! 392: if (tables) {
! 393: pefree((void*)tables, 1);
! 394: }
! 395: return NULL;
! 396: }
! 397:
! 398: /* If study option was specified, study the pattern and
! 399: store the result in extra for passing to pcre_exec. */
! 400: if (do_study) {
! 401: extra = pcre_study(re, soptions, &error);
! 402: if (extra) {
! 403: extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
! 404: }
! 405: if (error != NULL) {
! 406: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
! 407: }
! 408: } else {
! 409: extra = NULL;
! 410: }
! 411:
! 412: efree(pattern);
! 413:
! 414: /*
! 415: * If we reached cache limit, clean out the items from the head of the list;
! 416: * these are supposedly the oldest ones (but not necessarily the least used
! 417: * ones).
! 418: */
! 419: if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
! 420: int num_clean = PCRE_CACHE_SIZE / 8;
! 421: zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
! 422: }
! 423:
! 424: /* Store the compiled pattern and extra info in the cache. */
! 425: new_entry.re = re;
! 426: new_entry.extra = extra;
! 427: new_entry.preg_options = poptions;
! 428: new_entry.compile_options = coptions;
! 429: #if HAVE_SETLOCALE
! 430: new_entry.locale = pestrdup(locale, 1);
! 431: new_entry.tables = tables;
! 432: #endif
! 433: zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
! 434: sizeof(pcre_cache_entry), (void**)&pce);
! 435:
! 436: return pce;
! 437: }
! 438: /* }}} */
! 439:
! 440: /* {{{ pcre_get_compiled_regex
! 441: */
! 442: PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
! 443: {
! 444: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
! 445:
! 446: if (extra) {
! 447: *extra = pce ? pce->extra : NULL;
! 448: }
! 449: if (preg_options) {
! 450: *preg_options = pce ? pce->preg_options : 0;
! 451: }
! 452:
! 453: return pce ? pce->re : NULL;
! 454: }
! 455: /* }}} */
! 456:
! 457: /* {{{ pcre_get_compiled_regex_ex
! 458: */
! 459: PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
! 460: {
! 461: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
! 462:
! 463: if (extra) {
! 464: *extra = pce ? pce->extra : NULL;
! 465: }
! 466: if (preg_options) {
! 467: *preg_options = pce ? pce->preg_options : 0;
! 468: }
! 469: if (compile_options) {
! 470: *compile_options = pce ? pce->compile_options : 0;
! 471: }
! 472:
! 473: return pce ? pce->re : NULL;
! 474: }
! 475: /* }}} */
! 476:
! 477: /* {{{ add_offset_pair */
! 478: static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
! 479: {
! 480: zval *match_pair;
! 481:
! 482: ALLOC_ZVAL(match_pair);
! 483: array_init(match_pair);
! 484: INIT_PZVAL(match_pair);
! 485:
! 486: /* Add (match, offset) to the return value */
! 487: add_next_index_stringl(match_pair, str, len, 1);
! 488: add_next_index_long(match_pair, offset);
! 489:
! 490: if (name) {
! 491: zval_add_ref(&match_pair);
! 492: zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
! 493: }
! 494: zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
! 495: }
! 496: /* }}} */
! 497:
! 498: static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
! 499: {
! 500: /* parameters */
! 501: char *regex; /* Regular expression */
! 502: char *subject; /* String to match against */
! 503: int regex_len;
! 504: int subject_len;
! 505: pcre_cache_entry *pce; /* Compiled regular expression */
! 506: zval *subpats = NULL; /* Array for subpatterns */
! 507: long flags = 0; /* Match control flags */
! 508: long start_offset = 0; /* Where the new search starts */
! 509:
! 510: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len,
! 511: &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
! 512: RETURN_FALSE;
! 513: }
! 514:
! 515: /* Compile regex or get it from cache. */
! 516: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
! 517: RETURN_FALSE;
! 518: }
! 519:
! 520: php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
! 521: global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
! 522: }
! 523: /* }}} */
! 524:
! 525: /* {{{ php_pcre_match_impl() */
! 526: PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
! 527: zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
! 528: {
! 529: zval *result_set, /* Holds a set of subpatterns after
! 530: a global match */
! 531: **match_sets = NULL; /* An array of sets of matches for each
! 532: subpattern after a global match */
! 533: pcre_extra *extra = pce->extra;/* Holds results of studying */
! 534: pcre_extra extra_data; /* Used locally for exec options */
! 535: int exoptions = 0; /* Execution options */
! 536: int count = 0; /* Count of matched subpatterns */
! 537: int *offsets; /* Array of subpattern offsets */
! 538: int num_subpats; /* Number of captured subpatterns */
! 539: int size_offsets; /* Size of the offsets array */
! 540: int matched; /* Has anything matched */
! 541: int g_notempty = 0; /* If the match should not be empty */
! 542: const char **stringlist; /* Holds list of subpatterns */
! 543: char **subpat_names; /* Array for named subpatterns */
! 544: int i, rc;
! 545: int subpats_order; /* Order of subpattern matches */
! 546: int offset_capture; /* Capture match offsets: yes/no */
! 547:
! 548: /* Overwrite the passed-in value for subpatterns with an empty array. */
! 549: if (subpats != NULL) {
! 550: zval_dtor(subpats);
! 551: array_init(subpats);
! 552: }
! 553:
! 554: subpats_order = global ? PREG_PATTERN_ORDER : 0;
! 555:
! 556: if (use_flags) {
! 557: offset_capture = flags & PREG_OFFSET_CAPTURE;
! 558:
! 559: /*
! 560: * subpats_order is pre-set to pattern mode so we change it only if
! 561: * necessary.
! 562: */
! 563: if (flags & 0xff) {
! 564: subpats_order = flags & 0xff;
! 565: }
! 566: if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
! 567: (!global && subpats_order != 0)) {
! 568: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
! 569: return;
! 570: }
! 571: } else {
! 572: offset_capture = 0;
! 573: }
! 574:
! 575: /* Negative offset counts from the end of the string. */
! 576: if (start_offset < 0) {
! 577: start_offset = subject_len + start_offset;
! 578: if (start_offset < 0) {
! 579: start_offset = 0;
! 580: }
! 581: }
! 582:
! 583: if (extra == NULL) {
! 584: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
! 585: extra = &extra_data;
! 586: }
! 587: extra->match_limit = PCRE_G(backtrack_limit);
! 588: extra->match_limit_recursion = PCRE_G(recursion_limit);
! 589:
! 590: /* Calculate the size of the offsets array, and allocate memory for it. */
! 591: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
! 592: if (rc < 0) {
! 593: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
! 594: RETURN_FALSE;
! 595: }
! 596: num_subpats++;
! 597: size_offsets = num_subpats * 3;
! 598:
! 599: /*
! 600: * Build a mapping from subpattern numbers to their names. We will always
! 601: * allocate the table, even though there may be no named subpatterns. This
! 602: * avoids somewhat more complicated logic in the inner loops.
! 603: */
! 604: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
! 605: if (!subpat_names) {
! 606: RETURN_FALSE;
! 607: }
! 608:
! 609: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
! 610:
! 611: /* Allocate match sets array and initialize the values. */
! 612: if (global && subpats_order == PREG_PATTERN_ORDER) {
! 613: match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
! 614: for (i=0; i<num_subpats; i++) {
! 615: ALLOC_ZVAL(match_sets[i]);
! 616: array_init(match_sets[i]);
! 617: INIT_PZVAL(match_sets[i]);
! 618: }
! 619: }
! 620:
! 621: matched = 0;
! 622: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
! 623:
! 624: do {
! 625: /* Execute the regular expression. */
! 626: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
! 627: exoptions|g_notempty, offsets, size_offsets);
! 628:
! 629: /* the string was already proved to be valid UTF-8 */
! 630: exoptions |= PCRE_NO_UTF8_CHECK;
! 631:
! 632: /* Check for too many substrings condition. */
! 633: if (count == 0) {
! 634: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
! 635: count = size_offsets/3;
! 636: }
! 637:
! 638: /* If something has matched */
! 639: if (count > 0) {
! 640: matched++;
! 641:
! 642: /* If subpatterns array has been passed, fill it in with values. */
! 643: if (subpats != NULL) {
! 644: /* Try to get the list of substrings and display a warning if failed. */
! 645: if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
! 646: efree(subpat_names);
! 647: efree(offsets);
! 648: if (match_sets) efree(match_sets);
! 649: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
! 650: RETURN_FALSE;
! 651: }
! 652:
! 653: if (global) { /* global pattern matching */
! 654: if (subpats_order == PREG_PATTERN_ORDER) {
! 655: /* For each subpattern, insert it into the appropriate array. */
! 656: for (i = 0; i < count; i++) {
! 657: if (offset_capture) {
! 658: add_offset_pair(match_sets[i], (char *)stringlist[i],
! 659: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
! 660: } else {
! 661: add_next_index_stringl(match_sets[i], (char *)stringlist[i],
! 662: offsets[(i<<1)+1] - offsets[i<<1], 1);
! 663: }
! 664: }
! 665: /*
! 666: * If the number of captured subpatterns on this run is
! 667: * less than the total possible number, pad the result
! 668: * arrays with empty strings.
! 669: */
! 670: if (count < num_subpats) {
! 671: for (; i < num_subpats; i++) {
! 672: add_next_index_string(match_sets[i], "", 1);
! 673: }
! 674: }
! 675: } else {
! 676: /* Allocate the result set array */
! 677: ALLOC_ZVAL(result_set);
! 678: array_init(result_set);
! 679: INIT_PZVAL(result_set);
! 680:
! 681: /* Add all the subpatterns to it */
! 682: for (i = 0; i < count; i++) {
! 683: if (offset_capture) {
! 684: add_offset_pair(result_set, (char *)stringlist[i],
! 685: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
! 686: } else {
! 687: if (subpat_names[i]) {
! 688: add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
! 689: offsets[(i<<1)+1] - offsets[i<<1], 1);
! 690: }
! 691: add_next_index_stringl(result_set, (char *)stringlist[i],
! 692: offsets[(i<<1)+1] - offsets[i<<1], 1);
! 693: }
! 694: }
! 695: /* And add it to the output array */
! 696: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
! 697: }
! 698: } else { /* single pattern matching */
! 699: /* For each subpattern, insert it into the subpatterns array. */
! 700: for (i = 0; i < count; i++) {
! 701: if (offset_capture) {
! 702: add_offset_pair(subpats, (char *)stringlist[i],
! 703: offsets[(i<<1)+1] - offsets[i<<1],
! 704: offsets[i<<1], subpat_names[i]);
! 705: } else {
! 706: if (subpat_names[i]) {
! 707: add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
! 708: offsets[(i<<1)+1] - offsets[i<<1], 1);
! 709: }
! 710: add_next_index_stringl(subpats, (char *)stringlist[i],
! 711: offsets[(i<<1)+1] - offsets[i<<1], 1);
! 712: }
! 713: }
! 714: }
! 715:
! 716: pcre_free((void *) stringlist);
! 717: }
! 718: } else if (count == PCRE_ERROR_NOMATCH) {
! 719: /* If we previously set PCRE_NOTEMPTY after a null match,
! 720: this is not necessarily the end. We need to advance
! 721: the start offset, and continue. Fudge the offset values
! 722: to achieve this, unless we're already at the end of the string. */
! 723: if (g_notempty != 0 && start_offset < subject_len) {
! 724: offsets[0] = start_offset;
! 725: offsets[1] = start_offset + 1;
! 726: } else
! 727: break;
! 728: } else {
! 729: pcre_handle_exec_error(count TSRMLS_CC);
! 730: break;
! 731: }
! 732:
! 733: /* If we have matched an empty string, mimic what Perl's /g options does.
! 734: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
! 735: the match again at the same point. If this fails (picked up above) we
! 736: advance to the next character. */
! 737: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
! 738:
! 739: /* Advance to the position right after the last full match */
! 740: start_offset = offsets[1];
! 741: } while (global);
! 742:
! 743: /* Add the match sets to the output array and clean up */
! 744: if (global && subpats_order == PREG_PATTERN_ORDER) {
! 745: for (i = 0; i < num_subpats; i++) {
! 746: if (subpat_names[i]) {
! 747: zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
! 748: strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
! 749: Z_ADDREF_P(match_sets[i]);
! 750: }
! 751: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
! 752: }
! 753: efree(match_sets);
! 754: }
! 755:
! 756: efree(offsets);
! 757: efree(subpat_names);
! 758:
! 759: /* Did we encounter an error? */
! 760: if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
! 761: RETVAL_LONG(matched);
! 762: } else {
! 763: RETVAL_FALSE;
! 764: }
! 765: }
! 766: /* }}} */
! 767:
! 768: /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
! 769: Perform a Perl-style regular expression match */
! 770: static PHP_FUNCTION(preg_match)
! 771: {
! 772: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
! 773: }
! 774: /* }}} */
! 775:
! 776: /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]])
! 777: Perform a Perl-style global regular expression match */
! 778: static PHP_FUNCTION(preg_match_all)
! 779: {
! 780: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
! 781: }
! 782: /* }}} */
! 783:
! 784: /* {{{ preg_get_backref
! 785: */
! 786: static int preg_get_backref(char **str, int *backref)
! 787: {
! 788: register char in_brace = 0;
! 789: register char *walk = *str;
! 790:
! 791: if (walk[1] == 0)
! 792: return 0;
! 793:
! 794: if (*walk == '$' && walk[1] == '{') {
! 795: in_brace = 1;
! 796: walk++;
! 797: }
! 798: walk++;
! 799:
! 800: if (*walk >= '0' && *walk <= '9') {
! 801: *backref = *walk - '0';
! 802: walk++;
! 803: } else
! 804: return 0;
! 805:
! 806: if (*walk && *walk >= '0' && *walk <= '9') {
! 807: *backref = *backref * 10 + *walk - '0';
! 808: walk++;
! 809: }
! 810:
! 811: if (in_brace) {
! 812: if (*walk == 0 || *walk != '}')
! 813: return 0;
! 814: else
! 815: walk++;
! 816: }
! 817:
! 818: *str = walk;
! 819: return 1;
! 820: }
! 821: /* }}} */
! 822:
! 823: /* {{{ preg_do_repl_func
! 824: */
! 825: static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
! 826: {
! 827: zval *retval_ptr; /* Function return value */
! 828: zval **args[1]; /* Argument to pass to function */
! 829: zval *subpats; /* Captured subpatterns */
! 830: int result_len; /* Return value length */
! 831: int i;
! 832:
! 833: MAKE_STD_ZVAL(subpats);
! 834: array_init(subpats);
! 835: for (i = 0; i < count; i++) {
! 836: if (subpat_names[i]) {
! 837: add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
! 838: }
! 839: add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
! 840: }
! 841: args[0] = &subpats;
! 842:
! 843: if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
! 844: convert_to_string_ex(&retval_ptr);
! 845: *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
! 846: result_len = Z_STRLEN_P(retval_ptr);
! 847: zval_ptr_dtor(&retval_ptr);
! 848: } else {
! 849: if (!EG(exception)) {
! 850: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
! 851: }
! 852: result_len = offsets[1] - offsets[0];
! 853: *result = estrndup(&subject[offsets[0]], result_len);
! 854: }
! 855:
! 856: zval_ptr_dtor(&subpats);
! 857:
! 858: return result_len;
! 859: }
! 860: /* }}} */
! 861:
! 862: /* {{{ preg_do_eval
! 863: */
! 864: static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
! 865: int *offsets, int count, char **result TSRMLS_DC)
! 866: {
! 867: zval retval; /* Return value from evaluation */
! 868: char *eval_str_end, /* End of eval string */
! 869: *match, /* Current match for a backref */
! 870: *esc_match, /* Quote-escaped match */
! 871: *walk, /* Used to walk the code string */
! 872: *segment, /* Start of segment to append while walking */
! 873: walk_last; /* Last walked character */
! 874: int match_len; /* Length of the match */
! 875: int esc_match_len; /* Length of the quote-escaped match */
! 876: int result_len; /* Length of the result of the evaluation */
! 877: int backref; /* Current backref */
! 878: char *compiled_string_description;
! 879: smart_str code = {0};
! 880:
! 881: eval_str_end = eval_str + eval_str_len;
! 882: walk = segment = eval_str;
! 883: walk_last = 0;
! 884:
! 885: while (walk < eval_str_end) {
! 886: /* If found a backreference.. */
! 887: if ('\\' == *walk || '$' == *walk) {
! 888: smart_str_appendl(&code, segment, walk - segment);
! 889: if (walk_last == '\\') {
! 890: code.c[code.len-1] = *walk++;
! 891: segment = walk;
! 892: walk_last = 0;
! 893: continue;
! 894: }
! 895: segment = walk;
! 896: if (preg_get_backref(&walk, &backref)) {
! 897: if (backref < count) {
! 898: /* Find the corresponding string match and substitute it
! 899: in instead of the backref */
! 900: match = subject + offsets[backref<<1];
! 901: match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
! 902: if (match_len) {
! 903: esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
! 904: } else {
! 905: esc_match = match;
! 906: esc_match_len = 0;
! 907: }
! 908: } else {
! 909: esc_match = "";
! 910: esc_match_len = 0;
! 911: }
! 912: smart_str_appendl(&code, esc_match, esc_match_len);
! 913:
! 914: segment = walk;
! 915:
! 916: /* Clean up and reassign */
! 917: if (esc_match_len)
! 918: efree(esc_match);
! 919: continue;
! 920: }
! 921: }
! 922: walk++;
! 923: walk_last = walk[-1];
! 924: }
! 925: smart_str_appendl(&code, segment, walk - segment);
! 926: smart_str_0(&code);
! 927:
! 928: compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
! 929: /* Run the code */
! 930: if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
! 931: efree(compiled_string_description);
! 932: php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
! 933: /* zend_error() does not return in this case */
! 934: }
! 935: efree(compiled_string_description);
! 936: convert_to_string(&retval);
! 937:
! 938: /* Save the return value and its length */
! 939: *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
! 940: result_len = Z_STRLEN(retval);
! 941:
! 942: /* Clean up */
! 943: zval_dtor(&retval);
! 944: smart_str_free(&code);
! 945:
! 946: return result_len;
! 947: }
! 948: /* }}} */
! 949:
! 950: /* {{{ php_pcre_replace
! 951: */
! 952: PHPAPI char *php_pcre_replace(char *regex, int regex_len,
! 953: char *subject, int subject_len,
! 954: zval *replace_val, int is_callable_replace,
! 955: int *result_len, int limit, int *replace_count TSRMLS_DC)
! 956: {
! 957: pcre_cache_entry *pce; /* Compiled regular expression */
! 958:
! 959: /* Compile regex or get it from cache. */
! 960: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
! 961: return NULL;
! 962: }
! 963:
! 964: return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
! 965: is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
! 966: }
! 967: /* }}} */
! 968:
! 969: /* {{{ php_pcre_replace_impl() */
! 970: PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
! 971: int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
! 972: {
! 973: pcre_extra *extra = pce->extra;/* Holds results of studying */
! 974: pcre_extra extra_data; /* Used locally for exec options */
! 975: int exoptions = 0; /* Execution options */
! 976: int count = 0; /* Count of matched subpatterns */
! 977: int *offsets; /* Array of subpattern offsets */
! 978: char **subpat_names; /* Array for named subpatterns */
! 979: int num_subpats; /* Number of captured subpatterns */
! 980: int size_offsets; /* Size of the offsets array */
! 981: int new_len; /* Length of needed storage */
! 982: int alloc_len; /* Actual allocated length */
! 983: int eval_result_len=0; /* Length of the eval'ed or
! 984: function-returned string */
! 985: int match_len; /* Length of the current match */
! 986: int backref; /* Backreference number */
! 987: int eval; /* If the replacement string should be eval'ed */
! 988: int start_offset; /* Where the new search starts */
! 989: int g_notempty=0; /* If the match should not be empty */
! 990: int replace_len=0; /* Length of replacement string */
! 991: char *result, /* Result of replacement */
! 992: *replace=NULL, /* Replacement string */
! 993: *new_buf, /* Temporary buffer for re-allocation */
! 994: *walkbuf, /* Location of current replacement in the result */
! 995: *walk, /* Used to walk the replacement string */
! 996: *match, /* The current match */
! 997: *piece, /* The current piece of subject */
! 998: *replace_end=NULL, /* End of replacement string */
! 999: *eval_result, /* Result of eval or custom function */
! 1000: walk_last; /* Last walked character */
! 1001: int rc;
! 1002:
! 1003: if (extra == NULL) {
! 1004: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
! 1005: extra = &extra_data;
! 1006: }
! 1007: extra->match_limit = PCRE_G(backtrack_limit);
! 1008: extra->match_limit_recursion = PCRE_G(recursion_limit);
! 1009:
! 1010: eval = pce->preg_options & PREG_REPLACE_EVAL;
! 1011: if (is_callable_replace) {
! 1012: if (eval) {
! 1013: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
! 1014: return NULL;
! 1015: }
! 1016: } else {
! 1017: replace = Z_STRVAL_P(replace_val);
! 1018: replace_len = Z_STRLEN_P(replace_val);
! 1019: replace_end = replace + replace_len;
! 1020: }
! 1021:
! 1022: /* Calculate the size of the offsets array, and allocate memory for it. */
! 1023: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
! 1024: if (rc < 0) {
! 1025: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
! 1026: return NULL;
! 1027: }
! 1028: num_subpats++;
! 1029: size_offsets = num_subpats * 3;
! 1030:
! 1031: /*
! 1032: * Build a mapping from subpattern numbers to their names. We will always
! 1033: * allocate the table, even though there may be no named subpatterns. This
! 1034: * avoids somewhat more complicated logic in the inner loops.
! 1035: */
! 1036: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
! 1037: if (!subpat_names) {
! 1038: return NULL;
! 1039: }
! 1040:
! 1041: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
! 1042:
! 1043: alloc_len = 2 * subject_len + 1;
! 1044: result = safe_emalloc(alloc_len, sizeof(char), 0);
! 1045:
! 1046: /* Initialize */
! 1047: match = NULL;
! 1048: *result_len = 0;
! 1049: start_offset = 0;
! 1050: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
! 1051:
! 1052: while (1) {
! 1053: /* Execute the regular expression. */
! 1054: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
! 1055: exoptions|g_notempty, offsets, size_offsets);
! 1056:
! 1057: /* the string was already proved to be valid UTF-8 */
! 1058: exoptions |= PCRE_NO_UTF8_CHECK;
! 1059:
! 1060: /* Check for too many substrings condition. */
! 1061: if (count == 0) {
! 1062: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
! 1063: count = size_offsets/3;
! 1064: }
! 1065:
! 1066: piece = subject + start_offset;
! 1067:
! 1068: if (count > 0 && (limit == -1 || limit > 0)) {
! 1069: if (replace_count) {
! 1070: ++*replace_count;
! 1071: }
! 1072: /* Set the match location in subject */
! 1073: match = subject + offsets[0];
! 1074:
! 1075: new_len = *result_len + offsets[0] - start_offset; /* part before the match */
! 1076:
! 1077: /* If evaluating, do it and add the return string's length */
! 1078: if (eval) {
! 1079: eval_result_len = preg_do_eval(replace, replace_len, subject,
! 1080: offsets, count, &eval_result TSRMLS_CC);
! 1081: new_len += eval_result_len;
! 1082: } else if (is_callable_replace) {
! 1083: /* Use custom function to get replacement string and its length. */
! 1084: eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
! 1085: new_len += eval_result_len;
! 1086: } else { /* do regular substitution */
! 1087: walk = replace;
! 1088: walk_last = 0;
! 1089: while (walk < replace_end) {
! 1090: if ('\\' == *walk || '$' == *walk) {
! 1091: if (walk_last == '\\') {
! 1092: walk++;
! 1093: walk_last = 0;
! 1094: continue;
! 1095: }
! 1096: if (preg_get_backref(&walk, &backref)) {
! 1097: if (backref < count)
! 1098: new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
! 1099: continue;
! 1100: }
! 1101: }
! 1102: new_len++;
! 1103: walk++;
! 1104: walk_last = walk[-1];
! 1105: }
! 1106: }
! 1107:
! 1108: if (new_len + 1 > alloc_len) {
! 1109: alloc_len = 1 + alloc_len + 2 * new_len;
! 1110: new_buf = emalloc(alloc_len);
! 1111: memcpy(new_buf, result, *result_len);
! 1112: efree(result);
! 1113: result = new_buf;
! 1114: }
! 1115: /* copy the part of the string before the match */
! 1116: memcpy(&result[*result_len], piece, match-piece);
! 1117: *result_len += match-piece;
! 1118:
! 1119: /* copy replacement and backrefs */
! 1120: walkbuf = result + *result_len;
! 1121:
! 1122: /* If evaluating or using custom function, copy result to the buffer
! 1123: * and clean up. */
! 1124: if (eval || is_callable_replace) {
! 1125: memcpy(walkbuf, eval_result, eval_result_len);
! 1126: *result_len += eval_result_len;
! 1127: STR_FREE(eval_result);
! 1128: } else { /* do regular backreference copying */
! 1129: walk = replace;
! 1130: walk_last = 0;
! 1131: while (walk < replace_end) {
! 1132: if ('\\' == *walk || '$' == *walk) {
! 1133: if (walk_last == '\\') {
! 1134: *(walkbuf-1) = *walk++;
! 1135: walk_last = 0;
! 1136: continue;
! 1137: }
! 1138: if (preg_get_backref(&walk, &backref)) {
! 1139: if (backref < count) {
! 1140: match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
! 1141: memcpy(walkbuf, subject + offsets[backref<<1], match_len);
! 1142: walkbuf += match_len;
! 1143: }
! 1144: continue;
! 1145: }
! 1146: }
! 1147: *walkbuf++ = *walk++;
! 1148: walk_last = walk[-1];
! 1149: }
! 1150: *walkbuf = '\0';
! 1151: /* increment the result length by how much we've added to the string */
! 1152: *result_len += walkbuf - (result + *result_len);
! 1153: }
! 1154:
! 1155: if (limit != -1)
! 1156: limit--;
! 1157:
! 1158: } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
! 1159: /* If we previously set PCRE_NOTEMPTY after a null match,
! 1160: this is not necessarily the end. We need to advance
! 1161: the start offset, and continue. Fudge the offset values
! 1162: to achieve this, unless we're already at the end of the string. */
! 1163: if (g_notempty != 0 && start_offset < subject_len) {
! 1164: offsets[0] = start_offset;
! 1165: offsets[1] = start_offset + 1;
! 1166: memcpy(&result[*result_len], piece, 1);
! 1167: (*result_len)++;
! 1168: } else {
! 1169: new_len = *result_len + subject_len - start_offset;
! 1170: if (new_len + 1 > alloc_len) {
! 1171: alloc_len = new_len + 1; /* now we know exactly how long it is */
! 1172: new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
! 1173: memcpy(new_buf, result, *result_len);
! 1174: efree(result);
! 1175: result = new_buf;
! 1176: }
! 1177: /* stick that last bit of string on our output */
! 1178: memcpy(&result[*result_len], piece, subject_len - start_offset);
! 1179: *result_len += subject_len - start_offset;
! 1180: result[*result_len] = '\0';
! 1181: break;
! 1182: }
! 1183: } else {
! 1184: pcre_handle_exec_error(count TSRMLS_CC);
! 1185: efree(result);
! 1186: result = NULL;
! 1187: break;
! 1188: }
! 1189:
! 1190: /* If we have matched an empty string, mimic what Perl's /g options does.
! 1191: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
! 1192: the match again at the same point. If this fails (picked up above) we
! 1193: advance to the next character. */
! 1194: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
! 1195:
! 1196: /* Advance to the next piece. */
! 1197: start_offset = offsets[1];
! 1198: }
! 1199:
! 1200: efree(offsets);
! 1201: efree(subpat_names);
! 1202:
! 1203: return result;
! 1204: }
! 1205: /* }}} */
! 1206:
! 1207: /* {{{ php_replace_in_subject
! 1208: */
! 1209: static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
! 1210: {
! 1211: zval **regex_entry,
! 1212: **replace_entry = NULL,
! 1213: *replace_value,
! 1214: empty_replace;
! 1215: char *subject_value,
! 1216: *result;
! 1217: int subject_len;
! 1218:
! 1219: /* Make sure we're dealing with strings. */
! 1220: convert_to_string_ex(subject);
! 1221: /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
! 1222: ZVAL_STRINGL(&empty_replace, "", 0, 0);
! 1223:
! 1224: /* If regex is an array */
! 1225: if (Z_TYPE_P(regex) == IS_ARRAY) {
! 1226: /* Duplicate subject string for repeated replacement */
! 1227: subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
! 1228: subject_len = Z_STRLEN_PP(subject);
! 1229: *result_len = subject_len;
! 1230:
! 1231: zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
! 1232:
! 1233: replace_value = replace;
! 1234: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
! 1235: zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
! 1236:
! 1237: /* For each entry in the regex array, get the entry */
! 1238: while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
! 1239: /* Make sure we're dealing with strings. */
! 1240: convert_to_string_ex(regex_entry);
! 1241:
! 1242: /* If replace is an array and not a callable construct */
! 1243: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
! 1244: /* Get current entry */
! 1245: if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
! 1246: if (!is_callable_replace) {
! 1247: convert_to_string_ex(replace_entry);
! 1248: }
! 1249: replace_value = *replace_entry;
! 1250: zend_hash_move_forward(Z_ARRVAL_P(replace));
! 1251: } else {
! 1252: /* We've run out of replacement strings, so use an empty one */
! 1253: replace_value = &empty_replace;
! 1254: }
! 1255: }
! 1256:
! 1257: /* Do the actual replacement and put the result back into subject_value
! 1258: for further replacements. */
! 1259: if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
! 1260: Z_STRLEN_PP(regex_entry),
! 1261: subject_value,
! 1262: subject_len,
! 1263: replace_value,
! 1264: is_callable_replace,
! 1265: result_len,
! 1266: limit,
! 1267: replace_count TSRMLS_CC)) != NULL) {
! 1268: efree(subject_value);
! 1269: subject_value = result;
! 1270: subject_len = *result_len;
! 1271: } else {
! 1272: efree(subject_value);
! 1273: return NULL;
! 1274: }
! 1275:
! 1276: zend_hash_move_forward(Z_ARRVAL_P(regex));
! 1277: }
! 1278:
! 1279: return subject_value;
! 1280: } else {
! 1281: result = php_pcre_replace(Z_STRVAL_P(regex),
! 1282: Z_STRLEN_P(regex),
! 1283: Z_STRVAL_PP(subject),
! 1284: Z_STRLEN_PP(subject),
! 1285: replace,
! 1286: is_callable_replace,
! 1287: result_len,
! 1288: limit,
! 1289: replace_count TSRMLS_CC);
! 1290: return result;
! 1291: }
! 1292: }
! 1293: /* }}} */
! 1294:
! 1295: /* {{{ preg_replace_impl
! 1296: */
! 1297: static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
! 1298: {
! 1299: zval **regex,
! 1300: **replace,
! 1301: **subject,
! 1302: **subject_entry,
! 1303: **zcount = NULL;
! 1304: char *result;
! 1305: int result_len;
! 1306: int limit_val = -1;
! 1307: long limit = -1;
! 1308: char *string_key;
! 1309: ulong num_key;
! 1310: char *callback_name;
! 1311: int replace_count=0, old_replace_count;
! 1312:
! 1313: /* Get function parameters and do error-checking. */
! 1314: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
! 1315: return;
! 1316: }
! 1317:
! 1318: if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
! 1319: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
! 1320: RETURN_FALSE;
! 1321: }
! 1322:
! 1323: SEPARATE_ZVAL(replace);
! 1324: if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
! 1325: convert_to_string_ex(replace);
! 1326: }
! 1327: if (is_callable_replace) {
! 1328: if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
! 1329: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
! 1330: efree(callback_name);
! 1331: MAKE_COPY_ZVAL(subject, return_value);
! 1332: return;
! 1333: }
! 1334: efree(callback_name);
! 1335: }
! 1336:
! 1337: SEPARATE_ZVAL(regex);
! 1338: SEPARATE_ZVAL(subject);
! 1339:
! 1340: if (ZEND_NUM_ARGS() > 3) {
! 1341: limit_val = limit;
! 1342: }
! 1343:
! 1344: if (Z_TYPE_PP(regex) != IS_ARRAY)
! 1345: convert_to_string_ex(regex);
! 1346:
! 1347: /* if subject is an array */
! 1348: if (Z_TYPE_PP(subject) == IS_ARRAY) {
! 1349: array_init(return_value);
! 1350: zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
! 1351:
! 1352: /* For each subject entry, convert it to string, then perform replacement
! 1353: and add the result to the return_value array. */
! 1354: while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
! 1355: SEPARATE_ZVAL(subject_entry);
! 1356: old_replace_count = replace_count;
! 1357: if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
! 1358: if (!is_filter || replace_count > old_replace_count) {
! 1359: /* Add to return array */
! 1360: switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
! 1361: {
! 1362: case HASH_KEY_IS_STRING:
! 1363: add_assoc_stringl(return_value, string_key, result, result_len, 0);
! 1364: break;
! 1365:
! 1366: case HASH_KEY_IS_LONG:
! 1367: add_index_stringl(return_value, num_key, result, result_len, 0);
! 1368: break;
! 1369: }
! 1370: } else {
! 1371: efree(result);
! 1372: }
! 1373: }
! 1374:
! 1375: zend_hash_move_forward(Z_ARRVAL_PP(subject));
! 1376: }
! 1377: } else { /* if subject is not an array */
! 1378: old_replace_count = replace_count;
! 1379: if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
! 1380: if (!is_filter || replace_count > old_replace_count) {
! 1381: RETVAL_STRINGL(result, result_len, 0);
! 1382: } else {
! 1383: efree(result);
! 1384: }
! 1385: }
! 1386: }
! 1387: if (ZEND_NUM_ARGS() > 4) {
! 1388: zval_dtor(*zcount);
! 1389: ZVAL_LONG(*zcount, replace_count);
! 1390: }
! 1391:
! 1392: }
! 1393: /* }}} */
! 1394:
! 1395: /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
! 1396: Perform Perl-style regular expression replacement. */
! 1397: static PHP_FUNCTION(preg_replace)
! 1398: {
! 1399: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
! 1400: }
! 1401: /* }}} */
! 1402:
! 1403: /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
! 1404: Perform Perl-style regular expression replacement using replacement callback. */
! 1405: static PHP_FUNCTION(preg_replace_callback)
! 1406: {
! 1407: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
! 1408: }
! 1409: /* }}} */
! 1410:
! 1411: /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
! 1412: Perform Perl-style regular expression replacement and only return matches. */
! 1413: static PHP_FUNCTION(preg_filter)
! 1414: {
! 1415: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
! 1416: }
! 1417: /* }}} */
! 1418:
! 1419: /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
! 1420: Split string into an array using a perl-style regular expression as a delimiter */
! 1421: static PHP_FUNCTION(preg_split)
! 1422: {
! 1423: char *regex; /* Regular expression */
! 1424: char *subject; /* String to match against */
! 1425: int regex_len;
! 1426: int subject_len;
! 1427: long limit_val = -1;/* Integer value of limit */
! 1428: long flags = 0; /* Match control flags */
! 1429: pcre_cache_entry *pce; /* Compiled regular expression */
! 1430:
! 1431: /* Get function parameters and do error checking */
! 1432: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
! 1433: &subject, &subject_len, &limit_val, &flags) == FAILURE) {
! 1434: RETURN_FALSE;
! 1435: }
! 1436:
! 1437: /* Compile regex or get it from cache. */
! 1438: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
! 1439: RETURN_FALSE;
! 1440: }
! 1441:
! 1442: php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
! 1443: }
! 1444: /* }}} */
! 1445:
! 1446: /* {{{ php_pcre_split
! 1447: */
! 1448: PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
! 1449: long limit_val, long flags TSRMLS_DC)
! 1450: {
! 1451: pcre_extra *extra = NULL; /* Holds results of studying */
! 1452: pcre *re_bump = NULL; /* Regex instance for empty matches */
! 1453: pcre_extra *extra_bump = NULL; /* Almost dummy */
! 1454: pcre_extra extra_data; /* Used locally for exec options */
! 1455: int *offsets; /* Array of subpattern offsets */
! 1456: int size_offsets; /* Size of the offsets array */
! 1457: int exoptions = 0; /* Execution options */
! 1458: int count = 0; /* Count of matched subpatterns */
! 1459: int start_offset; /* Where the new search starts */
! 1460: int next_offset; /* End of the last delimiter match + 1 */
! 1461: int g_notempty = 0; /* If the match should not be empty */
! 1462: char *last_match; /* Location of last match */
! 1463: int rc;
! 1464: int no_empty; /* If NO_EMPTY flag is set */
! 1465: int delim_capture; /* If delimiters should be captured */
! 1466: int offset_capture; /* If offsets should be captured */
! 1467:
! 1468: no_empty = flags & PREG_SPLIT_NO_EMPTY;
! 1469: delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
! 1470: offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
! 1471:
! 1472: if (limit_val == 0) {
! 1473: limit_val = -1;
! 1474: }
! 1475:
! 1476: if (extra == NULL) {
! 1477: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
! 1478: extra = &extra_data;
! 1479: }
! 1480: extra->match_limit = PCRE_G(backtrack_limit);
! 1481: extra->match_limit_recursion = PCRE_G(recursion_limit);
! 1482:
! 1483: /* Initialize return value */
! 1484: array_init(return_value);
! 1485:
! 1486: /* Calculate the size of the offsets array, and allocate memory for it. */
! 1487: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
! 1488: if (rc < 0) {
! 1489: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
! 1490: RETURN_FALSE;
! 1491: }
! 1492: size_offsets = (size_offsets + 1) * 3;
! 1493: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
! 1494:
! 1495: /* Start at the beginning of the string */
! 1496: start_offset = 0;
! 1497: next_offset = 0;
! 1498: last_match = subject;
! 1499: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
! 1500:
! 1501: /* Get next piece if no limit or limit not yet reached and something matched*/
! 1502: while ((limit_val == -1 || limit_val > 1)) {
! 1503: count = pcre_exec(pce->re, extra, subject,
! 1504: subject_len, start_offset,
! 1505: exoptions|g_notempty, offsets, size_offsets);
! 1506:
! 1507: /* the string was already proved to be valid UTF-8 */
! 1508: exoptions |= PCRE_NO_UTF8_CHECK;
! 1509:
! 1510: /* Check for too many substrings condition. */
! 1511: if (count == 0) {
! 1512: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
! 1513: count = size_offsets/3;
! 1514: }
! 1515:
! 1516: /* If something matched */
! 1517: if (count > 0) {
! 1518: if (!no_empty || &subject[offsets[0]] != last_match) {
! 1519:
! 1520: if (offset_capture) {
! 1521: /* Add (match, offset) pair to the return value */
! 1522: add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
! 1523: } else {
! 1524: /* Add the piece to the return value */
! 1525: add_next_index_stringl(return_value, last_match,
! 1526: &subject[offsets[0]]-last_match, 1);
! 1527: }
! 1528:
! 1529: /* One less left to do */
! 1530: if (limit_val != -1)
! 1531: limit_val--;
! 1532: }
! 1533:
! 1534: last_match = &subject[offsets[1]];
! 1535: next_offset = offsets[1];
! 1536:
! 1537: if (delim_capture) {
! 1538: int i, match_len;
! 1539: for (i = 1; i < count; i++) {
! 1540: match_len = offsets[(i<<1)+1] - offsets[i<<1];
! 1541: /* If we have matched a delimiter */
! 1542: if (!no_empty || match_len > 0) {
! 1543: if (offset_capture) {
! 1544: add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
! 1545: } else {
! 1546: add_next_index_stringl(return_value,
! 1547: &subject[offsets[i<<1]],
! 1548: match_len, 1);
! 1549: }
! 1550: }
! 1551: }
! 1552: }
! 1553: } else if (count == PCRE_ERROR_NOMATCH) {
! 1554: /* If we previously set PCRE_NOTEMPTY after a null match,
! 1555: this is not necessarily the end. We need to advance
! 1556: the start offset, and continue. Fudge the offset values
! 1557: to achieve this, unless we're already at the end of the string. */
! 1558: if (g_notempty != 0 && start_offset < subject_len) {
! 1559: if (pce->compile_options & PCRE_UTF8) {
! 1560: if (re_bump == NULL) {
! 1561: int dummy;
! 1562:
! 1563: if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
! 1564: RETURN_FALSE;
! 1565: }
! 1566: }
! 1567: count = pcre_exec(re_bump, extra_bump, subject,
! 1568: subject_len, start_offset,
! 1569: exoptions, offsets, size_offsets);
! 1570: if (count < 1) {
! 1571: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
! 1572: RETURN_FALSE;
! 1573: }
! 1574: } else {
! 1575: offsets[0] = start_offset;
! 1576: offsets[1] = start_offset + 1;
! 1577: }
! 1578: } else
! 1579: break;
! 1580: } else {
! 1581: pcre_handle_exec_error(count TSRMLS_CC);
! 1582: break;
! 1583: }
! 1584:
! 1585: /* If we have matched an empty string, mimic what Perl's /g options does.
! 1586: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
! 1587: the match again at the same point. If this fails (picked up above) we
! 1588: advance to the next character. */
! 1589: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
! 1590:
! 1591: /* Advance to the position right after the last full match */
! 1592: start_offset = offsets[1];
! 1593: }
! 1594:
! 1595:
! 1596: start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
! 1597:
! 1598: if (!no_empty || start_offset < subject_len)
! 1599: {
! 1600: if (offset_capture) {
! 1601: /* Add the last (match, offset) pair to the return value */
! 1602: add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
! 1603: } else {
! 1604: /* Add the last piece to the return value */
! 1605: add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
! 1606: }
! 1607: }
! 1608:
! 1609:
! 1610: /* Clean up */
! 1611: efree(offsets);
! 1612: }
! 1613: /* }}} */
! 1614:
! 1615: /* {{{ proto string preg_quote(string str [, string delim_char])
! 1616: Quote regular expression characters plus an optional character */
! 1617: static PHP_FUNCTION(preg_quote)
! 1618: {
! 1619: int in_str_len;
! 1620: char *in_str; /* Input string argument */
! 1621: char *in_str_end; /* End of the input string */
! 1622: int delim_len = 0;
! 1623: char *delim = NULL; /* Additional delimiter argument */
! 1624: char *out_str, /* Output string with quoted characters */
! 1625: *p, /* Iterator for input string */
! 1626: *q, /* Iterator for output string */
! 1627: delim_char=0, /* Delimiter character to be quoted */
! 1628: c; /* Current character */
! 1629: zend_bool quote_delim = 0; /* Whether to quote additional delim char */
! 1630:
! 1631: /* Get the arguments and check for errors */
! 1632: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
! 1633: &delim, &delim_len) == FAILURE) {
! 1634: return;
! 1635: }
! 1636:
! 1637: in_str_end = in_str + in_str_len;
! 1638:
! 1639: /* Nothing to do if we got an empty string */
! 1640: if (in_str == in_str_end) {
! 1641: RETURN_EMPTY_STRING();
! 1642: }
! 1643:
! 1644: if (delim && *delim) {
! 1645: delim_char = delim[0];
! 1646: quote_delim = 1;
! 1647: }
! 1648:
! 1649: /* Allocate enough memory so that even if each character
! 1650: is quoted, we won't run out of room */
! 1651: out_str = safe_emalloc(4, in_str_len, 1);
! 1652:
! 1653: /* Go through the string and quote necessary characters */
! 1654: for(p = in_str, q = out_str; p != in_str_end; p++) {
! 1655: c = *p;
! 1656: switch(c) {
! 1657: case '.':
! 1658: case '\\':
! 1659: case '+':
! 1660: case '*':
! 1661: case '?':
! 1662: case '[':
! 1663: case '^':
! 1664: case ']':
! 1665: case '$':
! 1666: case '(':
! 1667: case ')':
! 1668: case '{':
! 1669: case '}':
! 1670: case '=':
! 1671: case '!':
! 1672: case '>':
! 1673: case '<':
! 1674: case '|':
! 1675: case ':':
! 1676: case '-':
! 1677: *q++ = '\\';
! 1678: *q++ = c;
! 1679: break;
! 1680:
! 1681: case '\0':
! 1682: *q++ = '\\';
! 1683: *q++ = '0';
! 1684: *q++ = '0';
! 1685: *q++ = '0';
! 1686: break;
! 1687:
! 1688: default:
! 1689: if (quote_delim && c == delim_char)
! 1690: *q++ = '\\';
! 1691: *q++ = c;
! 1692: break;
! 1693: }
! 1694: }
! 1695: *q = '\0';
! 1696:
! 1697: /* Reallocate string and return it */
! 1698: RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
! 1699: }
! 1700: /* }}} */
! 1701:
! 1702: /* {{{ proto array preg_grep(string regex, array input [, int flags])
! 1703: Searches array and returns entries which match regex */
! 1704: static PHP_FUNCTION(preg_grep)
! 1705: {
! 1706: char *regex; /* Regular expression */
! 1707: int regex_len;
! 1708: zval *input; /* Input array */
! 1709: long flags = 0; /* Match control flags */
! 1710: pcre_cache_entry *pce; /* Compiled regular expression */
! 1711:
! 1712: /* Get arguments and do error checking */
! 1713: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
! 1714: &input, &flags) == FAILURE) {
! 1715: return;
! 1716: }
! 1717:
! 1718: /* Compile regex or get it from cache. */
! 1719: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
! 1720: RETURN_FALSE;
! 1721: }
! 1722:
! 1723: php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
! 1724: }
! 1725: /* }}} */
! 1726:
! 1727: PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
! 1728: {
! 1729: zval **entry; /* An entry in the input array */
! 1730: pcre_extra *extra = pce->extra;/* Holds results of studying */
! 1731: pcre_extra extra_data; /* Used locally for exec options */
! 1732: int *offsets; /* Array of subpattern offsets */
! 1733: int size_offsets; /* Size of the offsets array */
! 1734: int count = 0; /* Count of matched subpatterns */
! 1735: char *string_key;
! 1736: ulong num_key;
! 1737: zend_bool invert; /* Whether to return non-matching
! 1738: entries */
! 1739: int rc;
! 1740:
! 1741: invert = flags & PREG_GREP_INVERT ? 1 : 0;
! 1742:
! 1743: if (extra == NULL) {
! 1744: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
! 1745: extra = &extra_data;
! 1746: }
! 1747: extra->match_limit = PCRE_G(backtrack_limit);
! 1748: extra->match_limit_recursion = PCRE_G(recursion_limit);
! 1749:
! 1750: /* Calculate the size of the offsets array, and allocate memory for it. */
! 1751: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
! 1752: if (rc < 0) {
! 1753: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
! 1754: RETURN_FALSE;
! 1755: }
! 1756: size_offsets = (size_offsets + 1) * 3;
! 1757: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
! 1758:
! 1759: /* Initialize return array */
! 1760: array_init(return_value);
! 1761:
! 1762: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
! 1763:
! 1764: /* Go through the input array */
! 1765: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
! 1766: while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
! 1767: zval subject = **entry;
! 1768:
! 1769: if (Z_TYPE_PP(entry) != IS_STRING) {
! 1770: zval_copy_ctor(&subject);
! 1771: convert_to_string(&subject);
! 1772: }
! 1773:
! 1774: /* Perform the match */
! 1775: count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
! 1776: Z_STRLEN(subject), 0,
! 1777: 0, offsets, size_offsets);
! 1778:
! 1779: /* Check for too many substrings condition. */
! 1780: if (count == 0) {
! 1781: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
! 1782: count = size_offsets/3;
! 1783: } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
! 1784: pcre_handle_exec_error(count TSRMLS_CC);
! 1785: break;
! 1786: }
! 1787:
! 1788: /* If the entry fits our requirements */
! 1789: if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
! 1790:
! 1791: Z_ADDREF_PP(entry);
! 1792:
! 1793: /* Add to return array */
! 1794: switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
! 1795: {
! 1796: case HASH_KEY_IS_STRING:
! 1797: zend_hash_update(Z_ARRVAL_P(return_value), string_key,
! 1798: strlen(string_key)+1, entry, sizeof(zval *), NULL);
! 1799: break;
! 1800:
! 1801: case HASH_KEY_IS_LONG:
! 1802: zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
! 1803: sizeof(zval *), NULL);
! 1804: break;
! 1805: }
! 1806: }
! 1807:
! 1808: if (Z_TYPE_PP(entry) != IS_STRING) {
! 1809: zval_dtor(&subject);
! 1810: }
! 1811:
! 1812: zend_hash_move_forward(Z_ARRVAL_P(input));
! 1813: }
! 1814: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
! 1815: /* Clean up */
! 1816: efree(offsets);
! 1817: }
! 1818: /* }}} */
! 1819:
! 1820: /* {{{ proto int preg_last_error()
! 1821: Returns the error code of the last regexp execution. */
! 1822: static PHP_FUNCTION(preg_last_error)
! 1823: {
! 1824: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
! 1825: return;
! 1826: }
! 1827:
! 1828: RETURN_LONG(PCRE_G(error_code));
! 1829: }
! 1830: /* }}} */
! 1831:
! 1832: /* {{{ module definition structures */
! 1833:
! 1834: /* {{{ arginfo */
! 1835: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
! 1836: ZEND_ARG_INFO(0, pattern)
! 1837: ZEND_ARG_INFO(0, subject)
! 1838: ZEND_ARG_INFO(1, subpatterns) /* array */
! 1839: ZEND_ARG_INFO(0, flags)
! 1840: ZEND_ARG_INFO(0, offset)
! 1841: ZEND_END_ARG_INFO()
! 1842:
! 1843: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
! 1844: ZEND_ARG_INFO(0, pattern)
! 1845: ZEND_ARG_INFO(0, subject)
! 1846: ZEND_ARG_INFO(1, subpatterns) /* array */
! 1847: ZEND_ARG_INFO(0, flags)
! 1848: ZEND_ARG_INFO(0, offset)
! 1849: ZEND_END_ARG_INFO()
! 1850:
! 1851: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
! 1852: ZEND_ARG_INFO(0, regex)
! 1853: ZEND_ARG_INFO(0, replace)
! 1854: ZEND_ARG_INFO(0, subject)
! 1855: ZEND_ARG_INFO(0, limit)
! 1856: ZEND_ARG_INFO(1, count)
! 1857: ZEND_END_ARG_INFO()
! 1858:
! 1859: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
! 1860: ZEND_ARG_INFO(0, regex)
! 1861: ZEND_ARG_INFO(0, callback)
! 1862: ZEND_ARG_INFO(0, subject)
! 1863: ZEND_ARG_INFO(0, limit)
! 1864: ZEND_ARG_INFO(1, count)
! 1865: ZEND_END_ARG_INFO()
! 1866:
! 1867: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
! 1868: ZEND_ARG_INFO(0, pattern)
! 1869: ZEND_ARG_INFO(0, subject)
! 1870: ZEND_ARG_INFO(0, limit)
! 1871: ZEND_ARG_INFO(0, flags)
! 1872: ZEND_END_ARG_INFO()
! 1873:
! 1874: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
! 1875: ZEND_ARG_INFO(0, str)
! 1876: ZEND_ARG_INFO(0, delim_char)
! 1877: ZEND_END_ARG_INFO()
! 1878:
! 1879: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
! 1880: ZEND_ARG_INFO(0, regex)
! 1881: ZEND_ARG_INFO(0, input) /* array */
! 1882: ZEND_ARG_INFO(0, flags)
! 1883: ZEND_END_ARG_INFO()
! 1884:
! 1885: ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
! 1886: ZEND_END_ARG_INFO()
! 1887: /* }}} */
! 1888:
! 1889: static const zend_function_entry pcre_functions[] = {
! 1890: PHP_FE(preg_match, arginfo_preg_match)
! 1891: PHP_FE(preg_match_all, arginfo_preg_match_all)
! 1892: PHP_FE(preg_replace, arginfo_preg_replace)
! 1893: PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
! 1894: PHP_FE(preg_filter, arginfo_preg_replace)
! 1895: PHP_FE(preg_split, arginfo_preg_split)
! 1896: PHP_FE(preg_quote, arginfo_preg_quote)
! 1897: PHP_FE(preg_grep, arginfo_preg_grep)
! 1898: PHP_FE(preg_last_error, arginfo_preg_last_error)
! 1899: PHP_FE_END
! 1900: };
! 1901:
! 1902: zend_module_entry pcre_module_entry = {
! 1903: STANDARD_MODULE_HEADER,
! 1904: "pcre",
! 1905: pcre_functions,
! 1906: PHP_MINIT(pcre),
! 1907: PHP_MSHUTDOWN(pcre),
! 1908: NULL,
! 1909: NULL,
! 1910: PHP_MINFO(pcre),
! 1911: NO_VERSION_YET,
! 1912: PHP_MODULE_GLOBALS(pcre),
! 1913: PHP_GINIT(pcre),
! 1914: PHP_GSHUTDOWN(pcre),
! 1915: NULL,
! 1916: STANDARD_MODULE_PROPERTIES_EX
! 1917: };
! 1918:
! 1919: #ifdef COMPILE_DL_PCRE
! 1920: ZEND_GET_MODULE(pcre)
! 1921: #endif
! 1922:
! 1923: /* }}} */
! 1924:
! 1925: #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
! 1926:
! 1927: /*
! 1928: * Local variables:
! 1929: * tab-width: 4
! 1930: * c-basic-offset: 4
! 1931: * End:
! 1932: * vim600: sw=4 ts=4 fdm=marker
! 1933: * vim<600: sw=4 ts=4
! 1934: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>