Annotation of embedaddon/php/ext/pcre/php_pcre.c, revision 1.1.1.3
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
1.1.1.3 ! misho 5: | Copyright (c) 1997-2013 The PHP Group |
1.1 misho 6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Andrei Zmievski <andrei@php.net> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 misho 19: /* $Id$ */
1.1 misho 20:
21: #include "php.h"
22: #include "php_ini.h"
23: #include "php_globals.h"
24: #include "php_pcre.h"
25: #include "ext/standard/info.h"
26: #include "ext/standard/php_smart_str.h"
27:
28: #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29:
30: #include "ext/standard/php_string.h"
31:
32: #define PREG_PATTERN_ORDER 1
33: #define PREG_SET_ORDER 2
34: #define PREG_OFFSET_CAPTURE (1<<8)
35:
36: #define PREG_SPLIT_NO_EMPTY (1<<0)
37: #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38: #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39:
40: #define PREG_REPLACE_EVAL (1<<0)
41:
42: #define PREG_GREP_INVERT (1<<0)
43:
44: #define PCRE_CACHE_SIZE 4096
45:
46: enum {
47: PHP_PCRE_NO_ERROR = 0,
48: PHP_PCRE_INTERNAL_ERROR,
49: PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50: PHP_PCRE_RECURSION_LIMIT_ERROR,
51: PHP_PCRE_BAD_UTF8_ERROR,
52: PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53: };
54:
55:
56: ZEND_DECLARE_MODULE_GLOBALS(pcre)
57:
58:
59: static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60: {
61: int preg_code = 0;
62:
63: switch (pcre_code) {
64: case PCRE_ERROR_MATCHLIMIT:
65: preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66: break;
67:
68: case PCRE_ERROR_RECURSIONLIMIT:
69: preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70: break;
71:
72: case PCRE_ERROR_BADUTF8:
73: preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74: break;
75:
76: case PCRE_ERROR_BADUTF8_OFFSET:
77: preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78: break;
79:
80: default:
81: preg_code = PHP_PCRE_INTERNAL_ERROR;
82: break;
83: }
84:
85: PCRE_G(error_code) = preg_code;
86: }
87: /* }}} */
88:
89: static void php_free_pcre_cache(void *data) /* {{{ */
90: {
91: pcre_cache_entry *pce = (pcre_cache_entry *) data;
92: if (!pce) return;
93: pefree(pce->re, 1);
94: if (pce->extra) pefree(pce->extra, 1);
95: #if HAVE_SETLOCALE
96: if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97: pefree(pce->locale, 1);
98: #endif
99: }
100: /* }}} */
101:
102: static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103: {
104: zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105: pcre_globals->backtrack_limit = 0;
106: pcre_globals->recursion_limit = 0;
107: pcre_globals->error_code = PHP_PCRE_NO_ERROR;
108: }
109: /* }}} */
110:
111: static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112: {
113: zend_hash_destroy(&pcre_globals->pcre_cache);
114: }
115: /* }}} */
116:
117: PHP_INI_BEGIN()
118: STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119: STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
120: PHP_INI_END()
121:
122:
123: /* {{{ PHP_MINFO_FUNCTION(pcre) */
124: static PHP_MINFO_FUNCTION(pcre)
125: {
126: php_info_print_table_start();
127: php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128: php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129: php_info_print_table_end();
130:
131: DISPLAY_INI_ENTRIES();
132: }
133: /* }}} */
134:
135: /* {{{ PHP_MINIT_FUNCTION(pcre) */
136: static PHP_MINIT_FUNCTION(pcre)
137: {
138: REGISTER_INI_ENTRIES();
139:
140: REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141: REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142: REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143: REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144: REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145: REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146: REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147:
148: REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149: REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150: REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151: REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154: REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155:
156: return SUCCESS;
157: }
158: /* }}} */
159:
160: /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
161: static PHP_MSHUTDOWN_FUNCTION(pcre)
162: {
163: UNREGISTER_INI_ENTRIES();
164:
165: return SUCCESS;
166: }
167: /* }}} */
168:
169: /* {{{ static pcre_clean_cache */
170: static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171: {
172: int *num_clean = (int *)arg;
173:
174: if (*num_clean > 0) {
175: (*num_clean)--;
176: return 1;
177: } else {
178: return 0;
179: }
180: }
181: /* }}} */
182:
183: /* {{{ static make_subpats_table */
184: static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185: {
186: pcre_extra *extra = pce->extra;
187: int name_cnt = 0, name_size, ni = 0;
188: int rc;
189: char *name_table;
190: unsigned short name_idx;
191: char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192:
193: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194: if (rc < 0) {
195: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196: efree(subpat_names);
197: return NULL;
198: }
199: if (name_cnt > 0) {
200: int rc1, rc2;
201:
202: rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203: rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204: rc = rc2 ? rc2 : rc1;
205: if (rc < 0) {
206: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207: efree(subpat_names);
208: return NULL;
209: }
210:
211: while (ni++ < name_cnt) {
212: name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213: subpat_names[name_idx] = name_table + 2;
214: if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216: efree(subpat_names);
217: return NULL;
218: }
219: name_table += name_size;
220: }
221: }
222:
223: return subpat_names;
224: }
225: /* }}} */
226:
227: /* {{{ pcre_get_compiled_regex_cache
228: */
229: PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230: {
231: pcre *re = NULL;
232: pcre_extra *extra;
233: int coptions = 0;
234: int soptions = 0;
235: const char *error;
236: int erroffset;
237: char delimiter;
238: char start_delimiter;
239: char end_delimiter;
240: char *p, *pp;
241: char *pattern;
242: int do_study = 0;
243: int poptions = 0;
244: int count = 0;
245: unsigned const char *tables = NULL;
246: #if HAVE_SETLOCALE
1.1.1.3 ! misho 247: char *locale;
1.1 misho 248: #endif
249: pcre_cache_entry *pce;
250: pcre_cache_entry new_entry;
1.1.1.3 ! misho 251: char *tmp = NULL;
! 252:
! 253: #if HAVE_SETLOCALE
! 254: # if defined(PHP_WIN32) && defined(ZTS)
! 255: _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
! 256: # endif
! 257: locale = setlocale(LC_CTYPE, NULL);
! 258: #endif
1.1 misho 259:
260: /* Try to lookup the cached regex entry, and if successful, just pass
261: back the compiled pattern, otherwise go on and compile it. */
262: if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
263: /*
264: * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
265: * is, we flush it and compile the pattern from scratch.
266: */
267: if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
268: zend_hash_clean(&PCRE_G(pcre_cache));
269: } else {
270: #if HAVE_SETLOCALE
271: if (!strcmp(pce->locale, locale)) {
272: #endif
273: return pce;
274: #if HAVE_SETLOCALE
275: }
276: #endif
277: }
278: }
279:
280: p = regex;
281:
282: /* Parse through the leading whitespace, and display a warning if we
283: get to the end without encountering a delimiter. */
284: while (isspace((int)*(unsigned char *)p)) p++;
285: if (*p == 0) {
1.1.1.3 ! misho 286: php_error_docref(NULL TSRMLS_CC, E_WARNING,
! 287: p < regex + regex_len ? "Null byte in regex" : "Empty regular expression");
1.1 misho 288: return NULL;
289: }
290:
291: /* Get the delimiter and display a warning if it is alphanumeric
292: or a backslash. */
293: delimiter = *p++;
294: if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
295: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
296: return NULL;
297: }
298:
299: start_delimiter = delimiter;
300: if ((pp = strchr("([{< )]}> )]}>", delimiter)))
301: delimiter = pp[5];
302: end_delimiter = delimiter;
303:
1.1.1.3 ! misho 304: pp = p;
! 305:
1.1 misho 306: if (start_delimiter == end_delimiter) {
307: /* We need to iterate through the pattern, searching for the ending delimiter,
308: but skipping the backslashed delimiters. If the ending delimiter is not
309: found, display a warning. */
310: while (*pp != 0) {
311: if (*pp == '\\' && pp[1] != 0) pp++;
312: else if (*pp == delimiter)
313: break;
314: pp++;
315: }
316: } else {
317: /* We iterate through the pattern, searching for the matching ending
318: * delimiter. For each matching starting delimiter, we increment nesting
319: * level, and decrement it for each matching ending delimiter. If we
320: * reach the end of the pattern without matching, display a warning.
321: */
322: int brackets = 1; /* brackets nesting level */
323: while (*pp != 0) {
324: if (*pp == '\\' && pp[1] != 0) pp++;
325: else if (*pp == end_delimiter && --brackets <= 0)
326: break;
327: else if (*pp == start_delimiter)
328: brackets++;
329: pp++;
330: }
1.1.1.3 ! misho 331: }
! 332:
! 333: if (*pp == 0) {
! 334: if (pp < regex + regex_len) {
! 335: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
! 336: } else if (start_delimiter == end_delimiter) {
! 337: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
! 338: } else {
! 339: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
1.1 misho 340: }
1.1.1.3 ! misho 341: return NULL;
1.1 misho 342: }
343:
344: /* Make a copy of the actual pattern. */
345: pattern = estrndup(p, pp-p);
346:
347: /* Move on to the options */
348: pp++;
349:
350: /* Parse through the options, setting appropriate flags. Display
351: a warning if we encounter an unknown modifier. */
1.1.1.3 ! misho 352: while (pp < regex + regex_len) {
1.1 misho 353: switch (*pp++) {
354: /* Perl compatible options */
355: case 'i': coptions |= PCRE_CASELESS; break;
356: case 'm': coptions |= PCRE_MULTILINE; break;
357: case 's': coptions |= PCRE_DOTALL; break;
358: case 'x': coptions |= PCRE_EXTENDED; break;
359:
360: /* PCRE specific options */
361: case 'A': coptions |= PCRE_ANCHORED; break;
362: case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
363: case 'S': do_study = 1; break;
364: case 'U': coptions |= PCRE_UNGREEDY; break;
365: case 'X': coptions |= PCRE_EXTRA; break;
366: case 'u': coptions |= PCRE_UTF8;
367: /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
368: characters, even in UTF-8 mode. However, this can be changed by setting
369: the PCRE_UCP option. */
370: #ifdef PCRE_UCP
371: coptions |= PCRE_UCP;
372: #endif
373: break;
374:
375: /* Custom preg options */
376: case 'e': poptions |= PREG_REPLACE_EVAL; break;
377:
378: case ' ':
379: case '\n':
380: break;
381:
382: default:
1.1.1.3 ! misho 383: if (pp[-1]) {
! 384: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
! 385: } else {
! 386: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
! 387: }
1.1 misho 388: efree(pattern);
389: return NULL;
390: }
391: }
392:
393: #if HAVE_SETLOCALE
394: if (strcmp(locale, "C"))
395: tables = pcre_maketables();
396: #endif
397:
398: /* Compile pattern and display a warning if compilation failed. */
399: re = pcre_compile(pattern,
400: coptions,
401: &error,
402: &erroffset,
403: tables);
404:
405: if (re == NULL) {
406: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
407: efree(pattern);
408: if (tables) {
409: pefree((void*)tables, 1);
410: }
411: return NULL;
412: }
413:
414: /* If study option was specified, study the pattern and
415: store the result in extra for passing to pcre_exec. */
416: if (do_study) {
417: extra = pcre_study(re, soptions, &error);
418: if (extra) {
419: extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
420: }
421: if (error != NULL) {
422: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
423: }
424: } else {
425: extra = NULL;
426: }
427:
428: efree(pattern);
429:
430: /*
431: * If we reached cache limit, clean out the items from the head of the list;
432: * these are supposedly the oldest ones (but not necessarily the least used
433: * ones).
434: */
435: if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
436: int num_clean = PCRE_CACHE_SIZE / 8;
437: zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
438: }
439:
440: /* Store the compiled pattern and extra info in the cache. */
441: new_entry.re = re;
442: new_entry.extra = extra;
443: new_entry.preg_options = poptions;
444: new_entry.compile_options = coptions;
445: #if HAVE_SETLOCALE
446: new_entry.locale = pestrdup(locale, 1);
447: new_entry.tables = tables;
448: #endif
1.1.1.3 ! misho 449:
! 450: /*
! 451: * Interned strings are not duplicated when stored in HashTable,
! 452: * but all the interned strings created during HTTP request are removed
! 453: * at end of request. However PCRE_G(pcre_cache) must be consistent
! 454: * on the next request as well. So we disable usage of interned strings
! 455: * as hash keys especually for this table.
! 456: * See bug #63180
! 457: */
! 458: if (IS_INTERNED(regex)) {
! 459: regex = tmp = estrndup(regex, regex_len);
! 460: }
! 461:
1.1 misho 462: zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
463: sizeof(pcre_cache_entry), (void**)&pce);
464:
1.1.1.3 ! misho 465: if (tmp) {
! 466: efree(tmp);
! 467: }
! 468:
1.1 misho 469: return pce;
470: }
471: /* }}} */
472:
473: /* {{{ pcre_get_compiled_regex
474: */
475: PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
476: {
477: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
478:
479: if (extra) {
480: *extra = pce ? pce->extra : NULL;
481: }
482: if (preg_options) {
483: *preg_options = pce ? pce->preg_options : 0;
484: }
485:
486: return pce ? pce->re : NULL;
487: }
488: /* }}} */
489:
490: /* {{{ pcre_get_compiled_regex_ex
491: */
492: PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
493: {
494: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
495:
496: if (extra) {
497: *extra = pce ? pce->extra : NULL;
498: }
499: if (preg_options) {
500: *preg_options = pce ? pce->preg_options : 0;
501: }
502: if (compile_options) {
503: *compile_options = pce ? pce->compile_options : 0;
504: }
505:
506: return pce ? pce->re : NULL;
507: }
508: /* }}} */
509:
510: /* {{{ add_offset_pair */
511: static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
512: {
513: zval *match_pair;
514:
515: ALLOC_ZVAL(match_pair);
516: array_init(match_pair);
517: INIT_PZVAL(match_pair);
518:
519: /* Add (match, offset) to the return value */
520: add_next_index_stringl(match_pair, str, len, 1);
521: add_next_index_long(match_pair, offset);
522:
523: if (name) {
524: zval_add_ref(&match_pair);
525: zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
526: }
527: zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
528: }
529: /* }}} */
530:
531: static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
532: {
533: /* parameters */
534: char *regex; /* Regular expression */
535: char *subject; /* String to match against */
536: int regex_len;
537: int subject_len;
538: pcre_cache_entry *pce; /* Compiled regular expression */
539: zval *subpats = NULL; /* Array for subpatterns */
540: long flags = 0; /* Match control flags */
541: long start_offset = 0; /* Where the new search starts */
542:
1.1.1.2 misho 543: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len,
1.1 misho 544: &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
545: RETURN_FALSE;
546: }
547:
548: /* Compile regex or get it from cache. */
549: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
550: RETURN_FALSE;
551: }
552:
553: php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
554: global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
555: }
556: /* }}} */
557:
558: /* {{{ php_pcre_match_impl() */
559: PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
560: zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
561: {
562: zval *result_set, /* Holds a set of subpatterns after
563: a global match */
564: **match_sets = NULL; /* An array of sets of matches for each
565: subpattern after a global match */
566: pcre_extra *extra = pce->extra;/* Holds results of studying */
567: pcre_extra extra_data; /* Used locally for exec options */
568: int exoptions = 0; /* Execution options */
569: int count = 0; /* Count of matched subpatterns */
570: int *offsets; /* Array of subpattern offsets */
571: int num_subpats; /* Number of captured subpatterns */
572: int size_offsets; /* Size of the offsets array */
573: int matched; /* Has anything matched */
574: int g_notempty = 0; /* If the match should not be empty */
575: const char **stringlist; /* Holds list of subpatterns */
576: char **subpat_names; /* Array for named subpatterns */
577: int i, rc;
578: int subpats_order; /* Order of subpattern matches */
579: int offset_capture; /* Capture match offsets: yes/no */
580:
581: /* Overwrite the passed-in value for subpatterns with an empty array. */
582: if (subpats != NULL) {
583: zval_dtor(subpats);
584: array_init(subpats);
585: }
586:
587: subpats_order = global ? PREG_PATTERN_ORDER : 0;
588:
589: if (use_flags) {
590: offset_capture = flags & PREG_OFFSET_CAPTURE;
591:
592: /*
593: * subpats_order is pre-set to pattern mode so we change it only if
594: * necessary.
595: */
596: if (flags & 0xff) {
597: subpats_order = flags & 0xff;
598: }
599: if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
600: (!global && subpats_order != 0)) {
601: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
602: return;
603: }
604: } else {
605: offset_capture = 0;
606: }
607:
608: /* Negative offset counts from the end of the string. */
609: if (start_offset < 0) {
610: start_offset = subject_len + start_offset;
611: if (start_offset < 0) {
612: start_offset = 0;
613: }
614: }
615:
616: if (extra == NULL) {
617: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
618: extra = &extra_data;
619: }
620: extra->match_limit = PCRE_G(backtrack_limit);
621: extra->match_limit_recursion = PCRE_G(recursion_limit);
622:
623: /* Calculate the size of the offsets array, and allocate memory for it. */
624: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
625: if (rc < 0) {
626: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
627: RETURN_FALSE;
628: }
629: num_subpats++;
630: size_offsets = num_subpats * 3;
631:
632: /*
633: * Build a mapping from subpattern numbers to their names. We will always
634: * allocate the table, even though there may be no named subpatterns. This
635: * avoids somewhat more complicated logic in the inner loops.
636: */
637: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
638: if (!subpat_names) {
639: RETURN_FALSE;
640: }
641:
642: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
643:
644: /* Allocate match sets array and initialize the values. */
1.1.1.2 misho 645: if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1.1 misho 646: match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
647: for (i=0; i<num_subpats; i++) {
648: ALLOC_ZVAL(match_sets[i]);
649: array_init(match_sets[i]);
650: INIT_PZVAL(match_sets[i]);
651: }
652: }
653:
654: matched = 0;
655: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
656:
657: do {
658: /* Execute the regular expression. */
659: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
660: exoptions|g_notempty, offsets, size_offsets);
661:
662: /* the string was already proved to be valid UTF-8 */
663: exoptions |= PCRE_NO_UTF8_CHECK;
664:
665: /* Check for too many substrings condition. */
666: if (count == 0) {
667: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
668: count = size_offsets/3;
669: }
670:
671: /* If something has matched */
672: if (count > 0) {
673: matched++;
674:
675: /* If subpatterns array has been passed, fill it in with values. */
676: if (subpats != NULL) {
677: /* Try to get the list of substrings and display a warning if failed. */
678: if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
679: efree(subpat_names);
680: efree(offsets);
681: if (match_sets) efree(match_sets);
682: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
683: RETURN_FALSE;
684: }
685:
686: if (global) { /* global pattern matching */
1.1.1.2 misho 687: if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1.1 misho 688: /* For each subpattern, insert it into the appropriate array. */
689: for (i = 0; i < count; i++) {
690: if (offset_capture) {
691: add_offset_pair(match_sets[i], (char *)stringlist[i],
692: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
693: } else {
694: add_next_index_stringl(match_sets[i], (char *)stringlist[i],
695: offsets[(i<<1)+1] - offsets[i<<1], 1);
696: }
697: }
698: /*
699: * If the number of captured subpatterns on this run is
700: * less than the total possible number, pad the result
701: * arrays with empty strings.
702: */
703: if (count < num_subpats) {
704: for (; i < num_subpats; i++) {
705: add_next_index_string(match_sets[i], "", 1);
706: }
707: }
708: } else {
709: /* Allocate the result set array */
710: ALLOC_ZVAL(result_set);
711: array_init(result_set);
712: INIT_PZVAL(result_set);
713:
714: /* Add all the subpatterns to it */
715: for (i = 0; i < count; i++) {
716: if (offset_capture) {
717: add_offset_pair(result_set, (char *)stringlist[i],
718: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
719: } else {
720: if (subpat_names[i]) {
721: add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
722: offsets[(i<<1)+1] - offsets[i<<1], 1);
723: }
724: add_next_index_stringl(result_set, (char *)stringlist[i],
725: offsets[(i<<1)+1] - offsets[i<<1], 1);
726: }
727: }
728: /* And add it to the output array */
729: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
730: }
731: } else { /* single pattern matching */
732: /* For each subpattern, insert it into the subpatterns array. */
733: for (i = 0; i < count; i++) {
734: if (offset_capture) {
735: add_offset_pair(subpats, (char *)stringlist[i],
736: offsets[(i<<1)+1] - offsets[i<<1],
737: offsets[i<<1], subpat_names[i]);
738: } else {
739: if (subpat_names[i]) {
740: add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
741: offsets[(i<<1)+1] - offsets[i<<1], 1);
742: }
743: add_next_index_stringl(subpats, (char *)stringlist[i],
744: offsets[(i<<1)+1] - offsets[i<<1], 1);
745: }
746: }
747: }
748:
749: pcre_free((void *) stringlist);
750: }
751: } else if (count == PCRE_ERROR_NOMATCH) {
752: /* If we previously set PCRE_NOTEMPTY after a null match,
753: this is not necessarily the end. We need to advance
754: the start offset, and continue. Fudge the offset values
755: to achieve this, unless we're already at the end of the string. */
756: if (g_notempty != 0 && start_offset < subject_len) {
757: offsets[0] = start_offset;
758: offsets[1] = start_offset + 1;
759: } else
760: break;
761: } else {
762: pcre_handle_exec_error(count TSRMLS_CC);
763: break;
764: }
765:
766: /* If we have matched an empty string, mimic what Perl's /g options does.
767: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
768: the match again at the same point. If this fails (picked up above) we
769: advance to the next character. */
770: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
771:
772: /* Advance to the position right after the last full match */
773: start_offset = offsets[1];
774: } while (global);
775:
776: /* Add the match sets to the output array and clean up */
1.1.1.2 misho 777: if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1.1 misho 778: for (i = 0; i < num_subpats; i++) {
779: if (subpat_names[i]) {
780: zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
781: strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
782: Z_ADDREF_P(match_sets[i]);
783: }
784: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
785: }
786: efree(match_sets);
787: }
788:
789: efree(offsets);
790: efree(subpat_names);
791:
792: /* Did we encounter an error? */
793: if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
794: RETVAL_LONG(matched);
795: } else {
796: RETVAL_FALSE;
797: }
798: }
799: /* }}} */
800:
801: /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
802: Perform a Perl-style regular expression match */
803: static PHP_FUNCTION(preg_match)
804: {
805: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
806: }
807: /* }}} */
808:
1.1.1.2 misho 809: /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1.1 misho 810: Perform a Perl-style global regular expression match */
811: static PHP_FUNCTION(preg_match_all)
812: {
813: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
814: }
815: /* }}} */
816:
817: /* {{{ preg_get_backref
818: */
819: static int preg_get_backref(char **str, int *backref)
820: {
821: register char in_brace = 0;
822: register char *walk = *str;
823:
824: if (walk[1] == 0)
825: return 0;
826:
827: if (*walk == '$' && walk[1] == '{') {
828: in_brace = 1;
829: walk++;
830: }
831: walk++;
832:
833: if (*walk >= '0' && *walk <= '9') {
834: *backref = *walk - '0';
835: walk++;
836: } else
837: return 0;
838:
839: if (*walk && *walk >= '0' && *walk <= '9') {
840: *backref = *backref * 10 + *walk - '0';
841: walk++;
842: }
843:
844: if (in_brace) {
845: if (*walk == 0 || *walk != '}')
846: return 0;
847: else
848: walk++;
849: }
850:
851: *str = walk;
852: return 1;
853: }
854: /* }}} */
855:
856: /* {{{ preg_do_repl_func
857: */
858: static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
859: {
860: zval *retval_ptr; /* Function return value */
861: zval **args[1]; /* Argument to pass to function */
862: zval *subpats; /* Captured subpatterns */
863: int result_len; /* Return value length */
864: int i;
865:
866: MAKE_STD_ZVAL(subpats);
867: array_init(subpats);
868: for (i = 0; i < count; i++) {
869: if (subpat_names[i]) {
870: add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
871: }
872: add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
873: }
874: args[0] = &subpats;
875:
876: if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
877: convert_to_string_ex(&retval_ptr);
878: *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
879: result_len = Z_STRLEN_P(retval_ptr);
880: zval_ptr_dtor(&retval_ptr);
881: } else {
882: if (!EG(exception)) {
883: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
884: }
885: result_len = offsets[1] - offsets[0];
886: *result = estrndup(&subject[offsets[0]], result_len);
887: }
888:
889: zval_ptr_dtor(&subpats);
890:
891: return result_len;
892: }
893: /* }}} */
894:
895: /* {{{ preg_do_eval
896: */
897: static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
898: int *offsets, int count, char **result TSRMLS_DC)
899: {
900: zval retval; /* Return value from evaluation */
901: char *eval_str_end, /* End of eval string */
902: *match, /* Current match for a backref */
903: *esc_match, /* Quote-escaped match */
904: *walk, /* Used to walk the code string */
905: *segment, /* Start of segment to append while walking */
906: walk_last; /* Last walked character */
907: int match_len; /* Length of the match */
908: int esc_match_len; /* Length of the quote-escaped match */
909: int result_len; /* Length of the result of the evaluation */
910: int backref; /* Current backref */
911: char *compiled_string_description;
912: smart_str code = {0};
913:
914: eval_str_end = eval_str + eval_str_len;
915: walk = segment = eval_str;
916: walk_last = 0;
917:
918: while (walk < eval_str_end) {
919: /* If found a backreference.. */
920: if ('\\' == *walk || '$' == *walk) {
921: smart_str_appendl(&code, segment, walk - segment);
922: if (walk_last == '\\') {
923: code.c[code.len-1] = *walk++;
924: segment = walk;
925: walk_last = 0;
926: continue;
927: }
928: segment = walk;
929: if (preg_get_backref(&walk, &backref)) {
930: if (backref < count) {
931: /* Find the corresponding string match and substitute it
932: in instead of the backref */
933: match = subject + offsets[backref<<1];
934: match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
935: if (match_len) {
1.1.1.2 misho 936: esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
1.1 misho 937: } else {
938: esc_match = match;
939: esc_match_len = 0;
940: }
941: } else {
942: esc_match = "";
943: esc_match_len = 0;
944: }
945: smart_str_appendl(&code, esc_match, esc_match_len);
946:
947: segment = walk;
948:
949: /* Clean up and reassign */
950: if (esc_match_len)
951: efree(esc_match);
952: continue;
953: }
954: }
955: walk++;
956: walk_last = walk[-1];
957: }
958: smart_str_appendl(&code, segment, walk - segment);
959: smart_str_0(&code);
960:
961: compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
962: /* Run the code */
963: if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
964: efree(compiled_string_description);
965: php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
966: /* zend_error() does not return in this case */
967: }
968: efree(compiled_string_description);
969: convert_to_string(&retval);
970:
971: /* Save the return value and its length */
972: *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
973: result_len = Z_STRLEN(retval);
974:
975: /* Clean up */
976: zval_dtor(&retval);
977: smart_str_free(&code);
978:
979: return result_len;
980: }
981: /* }}} */
982:
983: /* {{{ php_pcre_replace
984: */
985: PHPAPI char *php_pcre_replace(char *regex, int regex_len,
986: char *subject, int subject_len,
987: zval *replace_val, int is_callable_replace,
988: int *result_len, int limit, int *replace_count TSRMLS_DC)
989: {
990: pcre_cache_entry *pce; /* Compiled regular expression */
991:
992: /* Compile regex or get it from cache. */
993: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
994: return NULL;
995: }
996:
997: return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
998: is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
999: }
1000: /* }}} */
1001:
1002: /* {{{ php_pcre_replace_impl() */
1003: PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1004: int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
1005: {
1006: pcre_extra *extra = pce->extra;/* Holds results of studying */
1007: pcre_extra extra_data; /* Used locally for exec options */
1008: int exoptions = 0; /* Execution options */
1009: int count = 0; /* Count of matched subpatterns */
1010: int *offsets; /* Array of subpattern offsets */
1011: char **subpat_names; /* Array for named subpatterns */
1012: int num_subpats; /* Number of captured subpatterns */
1013: int size_offsets; /* Size of the offsets array */
1014: int new_len; /* Length of needed storage */
1015: int alloc_len; /* Actual allocated length */
1016: int eval_result_len=0; /* Length of the eval'ed or
1017: function-returned string */
1018: int match_len; /* Length of the current match */
1019: int backref; /* Backreference number */
1020: int eval; /* If the replacement string should be eval'ed */
1021: int start_offset; /* Where the new search starts */
1022: int g_notempty=0; /* If the match should not be empty */
1023: int replace_len=0; /* Length of replacement string */
1024: char *result, /* Result of replacement */
1025: *replace=NULL, /* Replacement string */
1026: *new_buf, /* Temporary buffer for re-allocation */
1027: *walkbuf, /* Location of current replacement in the result */
1028: *walk, /* Used to walk the replacement string */
1029: *match, /* The current match */
1030: *piece, /* The current piece of subject */
1031: *replace_end=NULL, /* End of replacement string */
1032: *eval_result, /* Result of eval or custom function */
1033: walk_last; /* Last walked character */
1034: int rc;
1035:
1036: if (extra == NULL) {
1037: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1038: extra = &extra_data;
1039: }
1040: extra->match_limit = PCRE_G(backtrack_limit);
1041: extra->match_limit_recursion = PCRE_G(recursion_limit);
1042:
1043: eval = pce->preg_options & PREG_REPLACE_EVAL;
1044: if (is_callable_replace) {
1045: if (eval) {
1046: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1047: return NULL;
1048: }
1049: } else {
1050: replace = Z_STRVAL_P(replace_val);
1051: replace_len = Z_STRLEN_P(replace_val);
1052: replace_end = replace + replace_len;
1053: }
1054:
1055: /* Calculate the size of the offsets array, and allocate memory for it. */
1056: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1057: if (rc < 0) {
1058: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1059: return NULL;
1060: }
1061: num_subpats++;
1062: size_offsets = num_subpats * 3;
1063:
1064: /*
1065: * Build a mapping from subpattern numbers to their names. We will always
1066: * allocate the table, even though there may be no named subpatterns. This
1067: * avoids somewhat more complicated logic in the inner loops.
1068: */
1069: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1070: if (!subpat_names) {
1071: return NULL;
1072: }
1073:
1074: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1075:
1076: alloc_len = 2 * subject_len + 1;
1077: result = safe_emalloc(alloc_len, sizeof(char), 0);
1078:
1079: /* Initialize */
1080: match = NULL;
1081: *result_len = 0;
1082: start_offset = 0;
1083: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1084:
1085: while (1) {
1086: /* Execute the regular expression. */
1087: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1088: exoptions|g_notempty, offsets, size_offsets);
1089:
1090: /* the string was already proved to be valid UTF-8 */
1091: exoptions |= PCRE_NO_UTF8_CHECK;
1092:
1093: /* Check for too many substrings condition. */
1094: if (count == 0) {
1095: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1096: count = size_offsets/3;
1097: }
1098:
1099: piece = subject + start_offset;
1100:
1101: if (count > 0 && (limit == -1 || limit > 0)) {
1102: if (replace_count) {
1103: ++*replace_count;
1104: }
1105: /* Set the match location in subject */
1106: match = subject + offsets[0];
1107:
1108: new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1109:
1110: /* If evaluating, do it and add the return string's length */
1111: if (eval) {
1112: eval_result_len = preg_do_eval(replace, replace_len, subject,
1113: offsets, count, &eval_result TSRMLS_CC);
1114: new_len += eval_result_len;
1115: } else if (is_callable_replace) {
1116: /* Use custom function to get replacement string and its length. */
1117: eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1118: new_len += eval_result_len;
1119: } else { /* do regular substitution */
1120: walk = replace;
1121: walk_last = 0;
1122: while (walk < replace_end) {
1123: if ('\\' == *walk || '$' == *walk) {
1124: if (walk_last == '\\') {
1125: walk++;
1126: walk_last = 0;
1127: continue;
1128: }
1129: if (preg_get_backref(&walk, &backref)) {
1130: if (backref < count)
1131: new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1132: continue;
1133: }
1134: }
1135: new_len++;
1136: walk++;
1137: walk_last = walk[-1];
1138: }
1139: }
1140:
1141: if (new_len + 1 > alloc_len) {
1142: alloc_len = 1 + alloc_len + 2 * new_len;
1143: new_buf = emalloc(alloc_len);
1144: memcpy(new_buf, result, *result_len);
1145: efree(result);
1146: result = new_buf;
1147: }
1148: /* copy the part of the string before the match */
1149: memcpy(&result[*result_len], piece, match-piece);
1150: *result_len += match-piece;
1151:
1152: /* copy replacement and backrefs */
1153: walkbuf = result + *result_len;
1154:
1155: /* If evaluating or using custom function, copy result to the buffer
1156: * and clean up. */
1157: if (eval || is_callable_replace) {
1158: memcpy(walkbuf, eval_result, eval_result_len);
1159: *result_len += eval_result_len;
1160: STR_FREE(eval_result);
1161: } else { /* do regular backreference copying */
1162: walk = replace;
1163: walk_last = 0;
1164: while (walk < replace_end) {
1165: if ('\\' == *walk || '$' == *walk) {
1166: if (walk_last == '\\') {
1167: *(walkbuf-1) = *walk++;
1168: walk_last = 0;
1169: continue;
1170: }
1171: if (preg_get_backref(&walk, &backref)) {
1172: if (backref < count) {
1173: match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1174: memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1175: walkbuf += match_len;
1176: }
1177: continue;
1178: }
1179: }
1180: *walkbuf++ = *walk++;
1181: walk_last = walk[-1];
1182: }
1183: *walkbuf = '\0';
1184: /* increment the result length by how much we've added to the string */
1185: *result_len += walkbuf - (result + *result_len);
1186: }
1187:
1188: if (limit != -1)
1189: limit--;
1190:
1191: } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1192: /* If we previously set PCRE_NOTEMPTY after a null match,
1193: this is not necessarily the end. We need to advance
1194: the start offset, and continue. Fudge the offset values
1195: to achieve this, unless we're already at the end of the string. */
1196: if (g_notempty != 0 && start_offset < subject_len) {
1197: offsets[0] = start_offset;
1198: offsets[1] = start_offset + 1;
1199: memcpy(&result[*result_len], piece, 1);
1200: (*result_len)++;
1201: } else {
1202: new_len = *result_len + subject_len - start_offset;
1203: if (new_len + 1 > alloc_len) {
1204: alloc_len = new_len + 1; /* now we know exactly how long it is */
1205: new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1206: memcpy(new_buf, result, *result_len);
1207: efree(result);
1208: result = new_buf;
1209: }
1210: /* stick that last bit of string on our output */
1211: memcpy(&result[*result_len], piece, subject_len - start_offset);
1212: *result_len += subject_len - start_offset;
1213: result[*result_len] = '\0';
1214: break;
1215: }
1216: } else {
1217: pcre_handle_exec_error(count TSRMLS_CC);
1218: efree(result);
1219: result = NULL;
1220: break;
1221: }
1222:
1223: /* If we have matched an empty string, mimic what Perl's /g options does.
1224: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1225: the match again at the same point. If this fails (picked up above) we
1226: advance to the next character. */
1227: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1228:
1229: /* Advance to the next piece. */
1230: start_offset = offsets[1];
1231: }
1232:
1233: efree(offsets);
1234: efree(subpat_names);
1235:
1236: return result;
1237: }
1238: /* }}} */
1239:
1240: /* {{{ php_replace_in_subject
1241: */
1242: static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1243: {
1244: zval **regex_entry,
1245: **replace_entry = NULL,
1246: *replace_value,
1247: empty_replace;
1248: char *subject_value,
1249: *result;
1250: int subject_len;
1251:
1252: /* Make sure we're dealing with strings. */
1253: convert_to_string_ex(subject);
1254: /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1255: ZVAL_STRINGL(&empty_replace, "", 0, 0);
1256:
1257: /* If regex is an array */
1258: if (Z_TYPE_P(regex) == IS_ARRAY) {
1259: /* Duplicate subject string for repeated replacement */
1260: subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1261: subject_len = Z_STRLEN_PP(subject);
1262: *result_len = subject_len;
1263:
1264: zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1265:
1266: replace_value = replace;
1267: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1268: zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1269:
1270: /* For each entry in the regex array, get the entry */
1271: while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1272: /* Make sure we're dealing with strings. */
1273: convert_to_string_ex(regex_entry);
1274:
1275: /* If replace is an array and not a callable construct */
1276: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1277: /* Get current entry */
1278: if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1279: if (!is_callable_replace) {
1280: convert_to_string_ex(replace_entry);
1281: }
1282: replace_value = *replace_entry;
1283: zend_hash_move_forward(Z_ARRVAL_P(replace));
1284: } else {
1285: /* We've run out of replacement strings, so use an empty one */
1286: replace_value = &empty_replace;
1287: }
1288: }
1289:
1290: /* Do the actual replacement and put the result back into subject_value
1291: for further replacements. */
1292: if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1293: Z_STRLEN_PP(regex_entry),
1294: subject_value,
1295: subject_len,
1296: replace_value,
1297: is_callable_replace,
1298: result_len,
1299: limit,
1300: replace_count TSRMLS_CC)) != NULL) {
1301: efree(subject_value);
1302: subject_value = result;
1303: subject_len = *result_len;
1304: } else {
1305: efree(subject_value);
1306: return NULL;
1307: }
1308:
1309: zend_hash_move_forward(Z_ARRVAL_P(regex));
1310: }
1311:
1312: return subject_value;
1313: } else {
1314: result = php_pcre_replace(Z_STRVAL_P(regex),
1315: Z_STRLEN_P(regex),
1316: Z_STRVAL_PP(subject),
1317: Z_STRLEN_PP(subject),
1318: replace,
1319: is_callable_replace,
1320: result_len,
1321: limit,
1322: replace_count TSRMLS_CC);
1323: return result;
1324: }
1325: }
1326: /* }}} */
1327:
1328: /* {{{ preg_replace_impl
1329: */
1330: static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1331: {
1332: zval **regex,
1333: **replace,
1334: **subject,
1335: **subject_entry,
1336: **zcount = NULL;
1337: char *result;
1338: int result_len;
1339: int limit_val = -1;
1340: long limit = -1;
1341: char *string_key;
1342: ulong num_key;
1343: char *callback_name;
1344: int replace_count=0, old_replace_count;
1345:
1346: /* Get function parameters and do error-checking. */
1347: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1348: return;
1349: }
1350:
1351: if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1352: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1353: RETURN_FALSE;
1354: }
1355:
1356: SEPARATE_ZVAL(replace);
1357: if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1358: convert_to_string_ex(replace);
1359: }
1360: if (is_callable_replace) {
1361: if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1362: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1363: efree(callback_name);
1364: MAKE_COPY_ZVAL(subject, return_value);
1365: return;
1366: }
1367: efree(callback_name);
1368: }
1369:
1370: SEPARATE_ZVAL(regex);
1371: SEPARATE_ZVAL(subject);
1372:
1373: if (ZEND_NUM_ARGS() > 3) {
1374: limit_val = limit;
1375: }
1376:
1377: if (Z_TYPE_PP(regex) != IS_ARRAY)
1378: convert_to_string_ex(regex);
1379:
1380: /* if subject is an array */
1381: if (Z_TYPE_PP(subject) == IS_ARRAY) {
1382: array_init(return_value);
1383: zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1384:
1385: /* For each subject entry, convert it to string, then perform replacement
1386: and add the result to the return_value array. */
1387: while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1388: SEPARATE_ZVAL(subject_entry);
1389: old_replace_count = replace_count;
1390: if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1391: if (!is_filter || replace_count > old_replace_count) {
1392: /* Add to return array */
1393: switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1394: {
1395: case HASH_KEY_IS_STRING:
1396: add_assoc_stringl(return_value, string_key, result, result_len, 0);
1397: break;
1398:
1399: case HASH_KEY_IS_LONG:
1400: add_index_stringl(return_value, num_key, result, result_len, 0);
1401: break;
1402: }
1403: } else {
1404: efree(result);
1405: }
1406: }
1407:
1408: zend_hash_move_forward(Z_ARRVAL_PP(subject));
1409: }
1410: } else { /* if subject is not an array */
1411: old_replace_count = replace_count;
1412: if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1413: if (!is_filter || replace_count > old_replace_count) {
1414: RETVAL_STRINGL(result, result_len, 0);
1415: } else {
1416: efree(result);
1417: }
1418: }
1419: }
1420: if (ZEND_NUM_ARGS() > 4) {
1421: zval_dtor(*zcount);
1422: ZVAL_LONG(*zcount, replace_count);
1423: }
1424:
1425: }
1426: /* }}} */
1427:
1428: /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1429: Perform Perl-style regular expression replacement. */
1430: static PHP_FUNCTION(preg_replace)
1431: {
1432: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1433: }
1434: /* }}} */
1435:
1436: /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1437: Perform Perl-style regular expression replacement using replacement callback. */
1438: static PHP_FUNCTION(preg_replace_callback)
1439: {
1440: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1441: }
1442: /* }}} */
1443:
1444: /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1445: Perform Perl-style regular expression replacement and only return matches. */
1446: static PHP_FUNCTION(preg_filter)
1447: {
1448: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1449: }
1450: /* }}} */
1451:
1452: /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1453: Split string into an array using a perl-style regular expression as a delimiter */
1454: static PHP_FUNCTION(preg_split)
1455: {
1456: char *regex; /* Regular expression */
1457: char *subject; /* String to match against */
1458: int regex_len;
1459: int subject_len;
1460: long limit_val = -1;/* Integer value of limit */
1461: long flags = 0; /* Match control flags */
1462: pcre_cache_entry *pce; /* Compiled regular expression */
1463:
1464: /* Get function parameters and do error checking */
1465: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1466: &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1467: RETURN_FALSE;
1468: }
1469:
1470: /* Compile regex or get it from cache. */
1471: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1472: RETURN_FALSE;
1473: }
1474:
1475: php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1476: }
1477: /* }}} */
1478:
1479: /* {{{ php_pcre_split
1480: */
1481: PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1482: long limit_val, long flags TSRMLS_DC)
1483: {
1484: pcre_extra *extra = NULL; /* Holds results of studying */
1485: pcre *re_bump = NULL; /* Regex instance for empty matches */
1486: pcre_extra *extra_bump = NULL; /* Almost dummy */
1487: pcre_extra extra_data; /* Used locally for exec options */
1488: int *offsets; /* Array of subpattern offsets */
1489: int size_offsets; /* Size of the offsets array */
1490: int exoptions = 0; /* Execution options */
1491: int count = 0; /* Count of matched subpatterns */
1492: int start_offset; /* Where the new search starts */
1493: int next_offset; /* End of the last delimiter match + 1 */
1494: int g_notempty = 0; /* If the match should not be empty */
1495: char *last_match; /* Location of last match */
1496: int rc;
1497: int no_empty; /* If NO_EMPTY flag is set */
1498: int delim_capture; /* If delimiters should be captured */
1499: int offset_capture; /* If offsets should be captured */
1500:
1501: no_empty = flags & PREG_SPLIT_NO_EMPTY;
1502: delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1503: offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1504:
1505: if (limit_val == 0) {
1506: limit_val = -1;
1507: }
1508:
1509: if (extra == NULL) {
1510: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1511: extra = &extra_data;
1512: }
1513: extra->match_limit = PCRE_G(backtrack_limit);
1514: extra->match_limit_recursion = PCRE_G(recursion_limit);
1515:
1516: /* Initialize return value */
1517: array_init(return_value);
1518:
1519: /* Calculate the size of the offsets array, and allocate memory for it. */
1520: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1521: if (rc < 0) {
1522: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1523: RETURN_FALSE;
1524: }
1525: size_offsets = (size_offsets + 1) * 3;
1526: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1527:
1528: /* Start at the beginning of the string */
1529: start_offset = 0;
1530: next_offset = 0;
1531: last_match = subject;
1532: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1533:
1534: /* Get next piece if no limit or limit not yet reached and something matched*/
1535: while ((limit_val == -1 || limit_val > 1)) {
1536: count = pcre_exec(pce->re, extra, subject,
1537: subject_len, start_offset,
1538: exoptions|g_notempty, offsets, size_offsets);
1539:
1540: /* the string was already proved to be valid UTF-8 */
1541: exoptions |= PCRE_NO_UTF8_CHECK;
1542:
1543: /* Check for too many substrings condition. */
1544: if (count == 0) {
1545: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1546: count = size_offsets/3;
1547: }
1548:
1549: /* If something matched */
1550: if (count > 0) {
1551: if (!no_empty || &subject[offsets[0]] != last_match) {
1552:
1553: if (offset_capture) {
1554: /* Add (match, offset) pair to the return value */
1555: add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1556: } else {
1557: /* Add the piece to the return value */
1558: add_next_index_stringl(return_value, last_match,
1559: &subject[offsets[0]]-last_match, 1);
1560: }
1561:
1562: /* One less left to do */
1563: if (limit_val != -1)
1564: limit_val--;
1565: }
1566:
1567: last_match = &subject[offsets[1]];
1568: next_offset = offsets[1];
1569:
1570: if (delim_capture) {
1571: int i, match_len;
1572: for (i = 1; i < count; i++) {
1573: match_len = offsets[(i<<1)+1] - offsets[i<<1];
1574: /* If we have matched a delimiter */
1575: if (!no_empty || match_len > 0) {
1576: if (offset_capture) {
1577: add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1578: } else {
1579: add_next_index_stringl(return_value,
1580: &subject[offsets[i<<1]],
1581: match_len, 1);
1582: }
1583: }
1584: }
1585: }
1586: } else if (count == PCRE_ERROR_NOMATCH) {
1587: /* If we previously set PCRE_NOTEMPTY after a null match,
1588: this is not necessarily the end. We need to advance
1589: the start offset, and continue. Fudge the offset values
1590: to achieve this, unless we're already at the end of the string. */
1591: if (g_notempty != 0 && start_offset < subject_len) {
1592: if (pce->compile_options & PCRE_UTF8) {
1593: if (re_bump == NULL) {
1594: int dummy;
1595:
1596: if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1597: RETURN_FALSE;
1598: }
1599: }
1600: count = pcre_exec(re_bump, extra_bump, subject,
1601: subject_len, start_offset,
1602: exoptions, offsets, size_offsets);
1603: if (count < 1) {
1604: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1605: RETURN_FALSE;
1606: }
1607: } else {
1608: offsets[0] = start_offset;
1609: offsets[1] = start_offset + 1;
1610: }
1611: } else
1612: break;
1613: } else {
1614: pcre_handle_exec_error(count TSRMLS_CC);
1615: break;
1616: }
1617:
1618: /* If we have matched an empty string, mimic what Perl's /g options does.
1619: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1620: the match again at the same point. If this fails (picked up above) we
1621: advance to the next character. */
1622: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1623:
1624: /* Advance to the position right after the last full match */
1625: start_offset = offsets[1];
1626: }
1627:
1628:
1629: start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1630:
1631: if (!no_empty || start_offset < subject_len)
1632: {
1633: if (offset_capture) {
1634: /* Add the last (match, offset) pair to the return value */
1635: add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1636: } else {
1637: /* Add the last piece to the return value */
1638: add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1639: }
1640: }
1641:
1642:
1643: /* Clean up */
1644: efree(offsets);
1645: }
1646: /* }}} */
1647:
1648: /* {{{ proto string preg_quote(string str [, string delim_char])
1649: Quote regular expression characters plus an optional character */
1650: static PHP_FUNCTION(preg_quote)
1651: {
1652: int in_str_len;
1653: char *in_str; /* Input string argument */
1654: char *in_str_end; /* End of the input string */
1655: int delim_len = 0;
1656: char *delim = NULL; /* Additional delimiter argument */
1657: char *out_str, /* Output string with quoted characters */
1658: *p, /* Iterator for input string */
1659: *q, /* Iterator for output string */
1660: delim_char=0, /* Delimiter character to be quoted */
1661: c; /* Current character */
1662: zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1663:
1664: /* Get the arguments and check for errors */
1665: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1666: &delim, &delim_len) == FAILURE) {
1667: return;
1668: }
1669:
1670: in_str_end = in_str + in_str_len;
1671:
1672: /* Nothing to do if we got an empty string */
1673: if (in_str == in_str_end) {
1674: RETURN_EMPTY_STRING();
1675: }
1676:
1677: if (delim && *delim) {
1678: delim_char = delim[0];
1679: quote_delim = 1;
1680: }
1681:
1682: /* Allocate enough memory so that even if each character
1683: is quoted, we won't run out of room */
1684: out_str = safe_emalloc(4, in_str_len, 1);
1685:
1686: /* Go through the string and quote necessary characters */
1687: for(p = in_str, q = out_str; p != in_str_end; p++) {
1688: c = *p;
1689: switch(c) {
1690: case '.':
1691: case '\\':
1692: case '+':
1693: case '*':
1694: case '?':
1695: case '[':
1696: case '^':
1697: case ']':
1698: case '$':
1699: case '(':
1700: case ')':
1701: case '{':
1702: case '}':
1703: case '=':
1704: case '!':
1705: case '>':
1706: case '<':
1707: case '|':
1708: case ':':
1709: case '-':
1710: *q++ = '\\';
1711: *q++ = c;
1712: break;
1713:
1714: case '\0':
1715: *q++ = '\\';
1716: *q++ = '0';
1717: *q++ = '0';
1718: *q++ = '0';
1719: break;
1720:
1721: default:
1722: if (quote_delim && c == delim_char)
1723: *q++ = '\\';
1724: *q++ = c;
1725: break;
1726: }
1727: }
1728: *q = '\0';
1729:
1730: /* Reallocate string and return it */
1731: RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1732: }
1733: /* }}} */
1734:
1735: /* {{{ proto array preg_grep(string regex, array input [, int flags])
1736: Searches array and returns entries which match regex */
1737: static PHP_FUNCTION(preg_grep)
1738: {
1739: char *regex; /* Regular expression */
1740: int regex_len;
1741: zval *input; /* Input array */
1742: long flags = 0; /* Match control flags */
1743: pcre_cache_entry *pce; /* Compiled regular expression */
1744:
1745: /* Get arguments and do error checking */
1746: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1747: &input, &flags) == FAILURE) {
1748: return;
1749: }
1750:
1751: /* Compile regex or get it from cache. */
1752: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1753: RETURN_FALSE;
1754: }
1755:
1756: php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1757: }
1758: /* }}} */
1759:
1760: PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1761: {
1762: zval **entry; /* An entry in the input array */
1763: pcre_extra *extra = pce->extra;/* Holds results of studying */
1764: pcre_extra extra_data; /* Used locally for exec options */
1765: int *offsets; /* Array of subpattern offsets */
1766: int size_offsets; /* Size of the offsets array */
1767: int count = 0; /* Count of matched subpatterns */
1768: char *string_key;
1769: ulong num_key;
1770: zend_bool invert; /* Whether to return non-matching
1771: entries */
1772: int rc;
1773:
1774: invert = flags & PREG_GREP_INVERT ? 1 : 0;
1775:
1776: if (extra == NULL) {
1777: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1778: extra = &extra_data;
1779: }
1780: extra->match_limit = PCRE_G(backtrack_limit);
1781: extra->match_limit_recursion = PCRE_G(recursion_limit);
1782:
1783: /* Calculate the size of the offsets array, and allocate memory for it. */
1784: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1785: if (rc < 0) {
1786: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1787: RETURN_FALSE;
1788: }
1789: size_offsets = (size_offsets + 1) * 3;
1790: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1791:
1792: /* Initialize return array */
1793: array_init(return_value);
1794:
1795: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1796:
1797: /* Go through the input array */
1798: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1799: while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1800: zval subject = **entry;
1801:
1802: if (Z_TYPE_PP(entry) != IS_STRING) {
1803: zval_copy_ctor(&subject);
1804: convert_to_string(&subject);
1805: }
1806:
1807: /* Perform the match */
1808: count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1809: Z_STRLEN(subject), 0,
1810: 0, offsets, size_offsets);
1811:
1812: /* Check for too many substrings condition. */
1813: if (count == 0) {
1814: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1815: count = size_offsets/3;
1816: } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1817: pcre_handle_exec_error(count TSRMLS_CC);
1818: break;
1819: }
1820:
1821: /* If the entry fits our requirements */
1822: if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1823:
1824: Z_ADDREF_PP(entry);
1825:
1826: /* Add to return array */
1827: switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1828: {
1829: case HASH_KEY_IS_STRING:
1830: zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1831: strlen(string_key)+1, entry, sizeof(zval *), NULL);
1832: break;
1833:
1834: case HASH_KEY_IS_LONG:
1835: zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1836: sizeof(zval *), NULL);
1837: break;
1838: }
1839: }
1840:
1841: if (Z_TYPE_PP(entry) != IS_STRING) {
1842: zval_dtor(&subject);
1843: }
1844:
1845: zend_hash_move_forward(Z_ARRVAL_P(input));
1846: }
1847: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1848: /* Clean up */
1849: efree(offsets);
1850: }
1851: /* }}} */
1852:
1853: /* {{{ proto int preg_last_error()
1854: Returns the error code of the last regexp execution. */
1855: static PHP_FUNCTION(preg_last_error)
1856: {
1857: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1858: return;
1859: }
1860:
1861: RETURN_LONG(PCRE_G(error_code));
1862: }
1863: /* }}} */
1864:
1865: /* {{{ module definition structures */
1866:
1867: /* {{{ arginfo */
1868: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1869: ZEND_ARG_INFO(0, pattern)
1870: ZEND_ARG_INFO(0, subject)
1871: ZEND_ARG_INFO(1, subpatterns) /* array */
1872: ZEND_ARG_INFO(0, flags)
1873: ZEND_ARG_INFO(0, offset)
1874: ZEND_END_ARG_INFO()
1875:
1.1.1.3 ! misho 1876: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1.1 misho 1877: ZEND_ARG_INFO(0, pattern)
1878: ZEND_ARG_INFO(0, subject)
1879: ZEND_ARG_INFO(1, subpatterns) /* array */
1880: ZEND_ARG_INFO(0, flags)
1881: ZEND_ARG_INFO(0, offset)
1882: ZEND_END_ARG_INFO()
1883:
1884: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1885: ZEND_ARG_INFO(0, regex)
1886: ZEND_ARG_INFO(0, replace)
1887: ZEND_ARG_INFO(0, subject)
1888: ZEND_ARG_INFO(0, limit)
1889: ZEND_ARG_INFO(1, count)
1890: ZEND_END_ARG_INFO()
1891:
1892: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1893: ZEND_ARG_INFO(0, regex)
1894: ZEND_ARG_INFO(0, callback)
1895: ZEND_ARG_INFO(0, subject)
1896: ZEND_ARG_INFO(0, limit)
1897: ZEND_ARG_INFO(1, count)
1898: ZEND_END_ARG_INFO()
1899:
1900: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1901: ZEND_ARG_INFO(0, pattern)
1902: ZEND_ARG_INFO(0, subject)
1903: ZEND_ARG_INFO(0, limit)
1904: ZEND_ARG_INFO(0, flags)
1905: ZEND_END_ARG_INFO()
1906:
1907: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1908: ZEND_ARG_INFO(0, str)
1909: ZEND_ARG_INFO(0, delim_char)
1910: ZEND_END_ARG_INFO()
1911:
1912: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1913: ZEND_ARG_INFO(0, regex)
1914: ZEND_ARG_INFO(0, input) /* array */
1915: ZEND_ARG_INFO(0, flags)
1916: ZEND_END_ARG_INFO()
1917:
1918: ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1919: ZEND_END_ARG_INFO()
1920: /* }}} */
1921:
1922: static const zend_function_entry pcre_functions[] = {
1923: PHP_FE(preg_match, arginfo_preg_match)
1924: PHP_FE(preg_match_all, arginfo_preg_match_all)
1925: PHP_FE(preg_replace, arginfo_preg_replace)
1926: PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
1927: PHP_FE(preg_filter, arginfo_preg_replace)
1928: PHP_FE(preg_split, arginfo_preg_split)
1929: PHP_FE(preg_quote, arginfo_preg_quote)
1930: PHP_FE(preg_grep, arginfo_preg_grep)
1931: PHP_FE(preg_last_error, arginfo_preg_last_error)
1932: PHP_FE_END
1933: };
1934:
1935: zend_module_entry pcre_module_entry = {
1936: STANDARD_MODULE_HEADER,
1937: "pcre",
1938: pcre_functions,
1939: PHP_MINIT(pcre),
1940: PHP_MSHUTDOWN(pcre),
1941: NULL,
1942: NULL,
1943: PHP_MINFO(pcre),
1944: NO_VERSION_YET,
1945: PHP_MODULE_GLOBALS(pcre),
1946: PHP_GINIT(pcre),
1947: PHP_GSHUTDOWN(pcre),
1948: NULL,
1949: STANDARD_MODULE_PROPERTIES_EX
1950: };
1951:
1952: #ifdef COMPILE_DL_PCRE
1953: ZEND_GET_MODULE(pcre)
1954: #endif
1955:
1956: /* }}} */
1957:
1958: #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1959:
1960: /*
1961: * Local variables:
1962: * tab-width: 4
1963: * c-basic-offset: 4
1964: * End:
1965: * vim600: sw=4 ts=4 fdm=marker
1966: * vim<600: sw=4 ts=4
1967: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>