Annotation of embedaddon/php/ext/pcre/php_pcre.c, revision 1.1.1.2
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Andrei Zmievski <andrei@php.net> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 ! misho 19: /* $Id$ */
1.1 misho 20:
21: #include "php.h"
22: #include "php_ini.h"
23: #include "php_globals.h"
24: #include "php_pcre.h"
25: #include "ext/standard/info.h"
26: #include "ext/standard/php_smart_str.h"
27:
28: #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29:
30: #include "ext/standard/php_string.h"
31:
32: #define PREG_PATTERN_ORDER 1
33: #define PREG_SET_ORDER 2
34: #define PREG_OFFSET_CAPTURE (1<<8)
35:
36: #define PREG_SPLIT_NO_EMPTY (1<<0)
37: #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38: #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39:
40: #define PREG_REPLACE_EVAL (1<<0)
41:
42: #define PREG_GREP_INVERT (1<<0)
43:
44: #define PCRE_CACHE_SIZE 4096
45:
46: enum {
47: PHP_PCRE_NO_ERROR = 0,
48: PHP_PCRE_INTERNAL_ERROR,
49: PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50: PHP_PCRE_RECURSION_LIMIT_ERROR,
51: PHP_PCRE_BAD_UTF8_ERROR,
52: PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53: };
54:
55:
56: ZEND_DECLARE_MODULE_GLOBALS(pcre)
57:
58:
59: static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60: {
61: int preg_code = 0;
62:
63: switch (pcre_code) {
64: case PCRE_ERROR_MATCHLIMIT:
65: preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66: break;
67:
68: case PCRE_ERROR_RECURSIONLIMIT:
69: preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70: break;
71:
72: case PCRE_ERROR_BADUTF8:
73: preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74: break;
75:
76: case PCRE_ERROR_BADUTF8_OFFSET:
77: preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78: break;
79:
80: default:
81: preg_code = PHP_PCRE_INTERNAL_ERROR;
82: break;
83: }
84:
85: PCRE_G(error_code) = preg_code;
86: }
87: /* }}} */
88:
89: static void php_free_pcre_cache(void *data) /* {{{ */
90: {
91: pcre_cache_entry *pce = (pcre_cache_entry *) data;
92: if (!pce) return;
93: pefree(pce->re, 1);
94: if (pce->extra) pefree(pce->extra, 1);
95: #if HAVE_SETLOCALE
96: if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97: pefree(pce->locale, 1);
98: #endif
99: }
100: /* }}} */
101:
102: static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103: {
104: zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105: pcre_globals->backtrack_limit = 0;
106: pcre_globals->recursion_limit = 0;
107: pcre_globals->error_code = PHP_PCRE_NO_ERROR;
108: }
109: /* }}} */
110:
111: static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112: {
113: zend_hash_destroy(&pcre_globals->pcre_cache);
114: }
115: /* }}} */
116:
117: PHP_INI_BEGIN()
118: STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119: STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
120: PHP_INI_END()
121:
122:
123: /* {{{ PHP_MINFO_FUNCTION(pcre) */
124: static PHP_MINFO_FUNCTION(pcre)
125: {
126: php_info_print_table_start();
127: php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128: php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129: php_info_print_table_end();
130:
131: DISPLAY_INI_ENTRIES();
132: }
133: /* }}} */
134:
135: /* {{{ PHP_MINIT_FUNCTION(pcre) */
136: static PHP_MINIT_FUNCTION(pcre)
137: {
138: REGISTER_INI_ENTRIES();
139:
140: REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141: REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142: REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143: REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144: REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145: REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146: REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147:
148: REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149: REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150: REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151: REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153: REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154: REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155:
156: return SUCCESS;
157: }
158: /* }}} */
159:
160: /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
161: static PHP_MSHUTDOWN_FUNCTION(pcre)
162: {
163: UNREGISTER_INI_ENTRIES();
164:
165: return SUCCESS;
166: }
167: /* }}} */
168:
169: /* {{{ static pcre_clean_cache */
170: static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171: {
172: int *num_clean = (int *)arg;
173:
174: if (*num_clean > 0) {
175: (*num_clean)--;
176: return 1;
177: } else {
178: return 0;
179: }
180: }
181: /* }}} */
182:
183: /* {{{ static make_subpats_table */
184: static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185: {
186: pcre_extra *extra = pce->extra;
187: int name_cnt = 0, name_size, ni = 0;
188: int rc;
189: char *name_table;
190: unsigned short name_idx;
191: char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192:
193: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194: if (rc < 0) {
195: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196: efree(subpat_names);
197: return NULL;
198: }
199: if (name_cnt > 0) {
200: int rc1, rc2;
201:
202: rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203: rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204: rc = rc2 ? rc2 : rc1;
205: if (rc < 0) {
206: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207: efree(subpat_names);
208: return NULL;
209: }
210:
211: while (ni++ < name_cnt) {
212: name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213: subpat_names[name_idx] = name_table + 2;
214: if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216: efree(subpat_names);
217: return NULL;
218: }
219: name_table += name_size;
220: }
221: }
222:
223: return subpat_names;
224: }
225: /* }}} */
226:
227: /* {{{ pcre_get_compiled_regex_cache
228: */
229: PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230: {
231: pcre *re = NULL;
232: pcre_extra *extra;
233: int coptions = 0;
234: int soptions = 0;
235: const char *error;
236: int erroffset;
237: char delimiter;
238: char start_delimiter;
239: char end_delimiter;
240: char *p, *pp;
241: char *pattern;
242: int do_study = 0;
243: int poptions = 0;
244: int count = 0;
245: unsigned const char *tables = NULL;
246: #if HAVE_SETLOCALE
247: char *locale = setlocale(LC_CTYPE, NULL);
248: #endif
249: pcre_cache_entry *pce;
250: pcre_cache_entry new_entry;
251:
252: /* Try to lookup the cached regex entry, and if successful, just pass
253: back the compiled pattern, otherwise go on and compile it. */
254: if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
255: /*
256: * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
257: * is, we flush it and compile the pattern from scratch.
258: */
259: if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
260: zend_hash_clean(&PCRE_G(pcre_cache));
261: } else {
262: #if HAVE_SETLOCALE
263: if (!strcmp(pce->locale, locale)) {
264: #endif
265: return pce;
266: #if HAVE_SETLOCALE
267: }
268: #endif
269: }
270: }
271:
272: p = regex;
273:
274: /* Parse through the leading whitespace, and display a warning if we
275: get to the end without encountering a delimiter. */
276: while (isspace((int)*(unsigned char *)p)) p++;
277: if (*p == 0) {
278: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
279: return NULL;
280: }
281:
282: /* Get the delimiter and display a warning if it is alphanumeric
283: or a backslash. */
284: delimiter = *p++;
285: if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
286: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
287: return NULL;
288: }
289:
290: start_delimiter = delimiter;
291: if ((pp = strchr("([{< )]}> )]}>", delimiter)))
292: delimiter = pp[5];
293: end_delimiter = delimiter;
294:
295: if (start_delimiter == end_delimiter) {
296: /* We need to iterate through the pattern, searching for the ending delimiter,
297: but skipping the backslashed delimiters. If the ending delimiter is not
298: found, display a warning. */
299: pp = p;
300: while (*pp != 0) {
301: if (*pp == '\\' && pp[1] != 0) pp++;
302: else if (*pp == delimiter)
303: break;
304: pp++;
305: }
306: if (*pp == 0) {
307: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
308: return NULL;
309: }
310: } else {
311: /* We iterate through the pattern, searching for the matching ending
312: * delimiter. For each matching starting delimiter, we increment nesting
313: * level, and decrement it for each matching ending delimiter. If we
314: * reach the end of the pattern without matching, display a warning.
315: */
316: int brackets = 1; /* brackets nesting level */
317: pp = p;
318: while (*pp != 0) {
319: if (*pp == '\\' && pp[1] != 0) pp++;
320: else if (*pp == end_delimiter && --brackets <= 0)
321: break;
322: else if (*pp == start_delimiter)
323: brackets++;
324: pp++;
325: }
326: if (*pp == 0) {
327: php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
328: return NULL;
329: }
330: }
331:
332: /* Make a copy of the actual pattern. */
333: pattern = estrndup(p, pp-p);
334:
335: /* Move on to the options */
336: pp++;
337:
338: /* Parse through the options, setting appropriate flags. Display
339: a warning if we encounter an unknown modifier. */
340: while (*pp != 0) {
341: switch (*pp++) {
342: /* Perl compatible options */
343: case 'i': coptions |= PCRE_CASELESS; break;
344: case 'm': coptions |= PCRE_MULTILINE; break;
345: case 's': coptions |= PCRE_DOTALL; break;
346: case 'x': coptions |= PCRE_EXTENDED; break;
347:
348: /* PCRE specific options */
349: case 'A': coptions |= PCRE_ANCHORED; break;
350: case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
351: case 'S': do_study = 1; break;
352: case 'U': coptions |= PCRE_UNGREEDY; break;
353: case 'X': coptions |= PCRE_EXTRA; break;
354: case 'u': coptions |= PCRE_UTF8;
355: /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
356: characters, even in UTF-8 mode. However, this can be changed by setting
357: the PCRE_UCP option. */
358: #ifdef PCRE_UCP
359: coptions |= PCRE_UCP;
360: #endif
361: break;
362:
363: /* Custom preg options */
364: case 'e': poptions |= PREG_REPLACE_EVAL; break;
365:
366: case ' ':
367: case '\n':
368: break;
369:
370: default:
371: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
372: efree(pattern);
373: return NULL;
374: }
375: }
376:
377: #if HAVE_SETLOCALE
378: if (strcmp(locale, "C"))
379: tables = pcre_maketables();
380: #endif
381:
382: /* Compile pattern and display a warning if compilation failed. */
383: re = pcre_compile(pattern,
384: coptions,
385: &error,
386: &erroffset,
387: tables);
388:
389: if (re == NULL) {
390: php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
391: efree(pattern);
392: if (tables) {
393: pefree((void*)tables, 1);
394: }
395: return NULL;
396: }
397:
398: /* If study option was specified, study the pattern and
399: store the result in extra for passing to pcre_exec. */
400: if (do_study) {
401: extra = pcre_study(re, soptions, &error);
402: if (extra) {
403: extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
404: }
405: if (error != NULL) {
406: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
407: }
408: } else {
409: extra = NULL;
410: }
411:
412: efree(pattern);
413:
414: /*
415: * If we reached cache limit, clean out the items from the head of the list;
416: * these are supposedly the oldest ones (but not necessarily the least used
417: * ones).
418: */
419: if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
420: int num_clean = PCRE_CACHE_SIZE / 8;
421: zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
422: }
423:
424: /* Store the compiled pattern and extra info in the cache. */
425: new_entry.re = re;
426: new_entry.extra = extra;
427: new_entry.preg_options = poptions;
428: new_entry.compile_options = coptions;
429: #if HAVE_SETLOCALE
430: new_entry.locale = pestrdup(locale, 1);
431: new_entry.tables = tables;
432: #endif
433: zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
434: sizeof(pcre_cache_entry), (void**)&pce);
435:
436: return pce;
437: }
438: /* }}} */
439:
440: /* {{{ pcre_get_compiled_regex
441: */
442: PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
443: {
444: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
445:
446: if (extra) {
447: *extra = pce ? pce->extra : NULL;
448: }
449: if (preg_options) {
450: *preg_options = pce ? pce->preg_options : 0;
451: }
452:
453: return pce ? pce->re : NULL;
454: }
455: /* }}} */
456:
457: /* {{{ pcre_get_compiled_regex_ex
458: */
459: PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
460: {
461: pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
462:
463: if (extra) {
464: *extra = pce ? pce->extra : NULL;
465: }
466: if (preg_options) {
467: *preg_options = pce ? pce->preg_options : 0;
468: }
469: if (compile_options) {
470: *compile_options = pce ? pce->compile_options : 0;
471: }
472:
473: return pce ? pce->re : NULL;
474: }
475: /* }}} */
476:
477: /* {{{ add_offset_pair */
478: static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
479: {
480: zval *match_pair;
481:
482: ALLOC_ZVAL(match_pair);
483: array_init(match_pair);
484: INIT_PZVAL(match_pair);
485:
486: /* Add (match, offset) to the return value */
487: add_next_index_stringl(match_pair, str, len, 1);
488: add_next_index_long(match_pair, offset);
489:
490: if (name) {
491: zval_add_ref(&match_pair);
492: zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
493: }
494: zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
495: }
496: /* }}} */
497:
498: static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
499: {
500: /* parameters */
501: char *regex; /* Regular expression */
502: char *subject; /* String to match against */
503: int regex_len;
504: int subject_len;
505: pcre_cache_entry *pce; /* Compiled regular expression */
506: zval *subpats = NULL; /* Array for subpatterns */
507: long flags = 0; /* Match control flags */
508: long start_offset = 0; /* Where the new search starts */
509:
1.1.1.2 ! misho 510: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len,
1.1 misho 511: &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
512: RETURN_FALSE;
513: }
514:
515: /* Compile regex or get it from cache. */
516: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
517: RETURN_FALSE;
518: }
519:
520: php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
521: global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
522: }
523: /* }}} */
524:
525: /* {{{ php_pcre_match_impl() */
526: PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
527: zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
528: {
529: zval *result_set, /* Holds a set of subpatterns after
530: a global match */
531: **match_sets = NULL; /* An array of sets of matches for each
532: subpattern after a global match */
533: pcre_extra *extra = pce->extra;/* Holds results of studying */
534: pcre_extra extra_data; /* Used locally for exec options */
535: int exoptions = 0; /* Execution options */
536: int count = 0; /* Count of matched subpatterns */
537: int *offsets; /* Array of subpattern offsets */
538: int num_subpats; /* Number of captured subpatterns */
539: int size_offsets; /* Size of the offsets array */
540: int matched; /* Has anything matched */
541: int g_notempty = 0; /* If the match should not be empty */
542: const char **stringlist; /* Holds list of subpatterns */
543: char **subpat_names; /* Array for named subpatterns */
544: int i, rc;
545: int subpats_order; /* Order of subpattern matches */
546: int offset_capture; /* Capture match offsets: yes/no */
547:
548: /* Overwrite the passed-in value for subpatterns with an empty array. */
549: if (subpats != NULL) {
550: zval_dtor(subpats);
551: array_init(subpats);
552: }
553:
554: subpats_order = global ? PREG_PATTERN_ORDER : 0;
555:
556: if (use_flags) {
557: offset_capture = flags & PREG_OFFSET_CAPTURE;
558:
559: /*
560: * subpats_order is pre-set to pattern mode so we change it only if
561: * necessary.
562: */
563: if (flags & 0xff) {
564: subpats_order = flags & 0xff;
565: }
566: if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
567: (!global && subpats_order != 0)) {
568: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
569: return;
570: }
571: } else {
572: offset_capture = 0;
573: }
574:
575: /* Negative offset counts from the end of the string. */
576: if (start_offset < 0) {
577: start_offset = subject_len + start_offset;
578: if (start_offset < 0) {
579: start_offset = 0;
580: }
581: }
582:
583: if (extra == NULL) {
584: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
585: extra = &extra_data;
586: }
587: extra->match_limit = PCRE_G(backtrack_limit);
588: extra->match_limit_recursion = PCRE_G(recursion_limit);
589:
590: /* Calculate the size of the offsets array, and allocate memory for it. */
591: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
592: if (rc < 0) {
593: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
594: RETURN_FALSE;
595: }
596: num_subpats++;
597: size_offsets = num_subpats * 3;
598:
599: /*
600: * Build a mapping from subpattern numbers to their names. We will always
601: * allocate the table, even though there may be no named subpatterns. This
602: * avoids somewhat more complicated logic in the inner loops.
603: */
604: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
605: if (!subpat_names) {
606: RETURN_FALSE;
607: }
608:
609: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
610:
611: /* Allocate match sets array and initialize the values. */
1.1.1.2 ! misho 612: if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1.1 misho 613: match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
614: for (i=0; i<num_subpats; i++) {
615: ALLOC_ZVAL(match_sets[i]);
616: array_init(match_sets[i]);
617: INIT_PZVAL(match_sets[i]);
618: }
619: }
620:
621: matched = 0;
622: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
623:
624: do {
625: /* Execute the regular expression. */
626: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
627: exoptions|g_notempty, offsets, size_offsets);
628:
629: /* the string was already proved to be valid UTF-8 */
630: exoptions |= PCRE_NO_UTF8_CHECK;
631:
632: /* Check for too many substrings condition. */
633: if (count == 0) {
634: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
635: count = size_offsets/3;
636: }
637:
638: /* If something has matched */
639: if (count > 0) {
640: matched++;
641:
642: /* If subpatterns array has been passed, fill it in with values. */
643: if (subpats != NULL) {
644: /* Try to get the list of substrings and display a warning if failed. */
645: if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
646: efree(subpat_names);
647: efree(offsets);
648: if (match_sets) efree(match_sets);
649: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
650: RETURN_FALSE;
651: }
652:
653: if (global) { /* global pattern matching */
1.1.1.2 ! misho 654: if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1.1 misho 655: /* For each subpattern, insert it into the appropriate array. */
656: for (i = 0; i < count; i++) {
657: if (offset_capture) {
658: add_offset_pair(match_sets[i], (char *)stringlist[i],
659: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
660: } else {
661: add_next_index_stringl(match_sets[i], (char *)stringlist[i],
662: offsets[(i<<1)+1] - offsets[i<<1], 1);
663: }
664: }
665: /*
666: * If the number of captured subpatterns on this run is
667: * less than the total possible number, pad the result
668: * arrays with empty strings.
669: */
670: if (count < num_subpats) {
671: for (; i < num_subpats; i++) {
672: add_next_index_string(match_sets[i], "", 1);
673: }
674: }
675: } else {
676: /* Allocate the result set array */
677: ALLOC_ZVAL(result_set);
678: array_init(result_set);
679: INIT_PZVAL(result_set);
680:
681: /* Add all the subpatterns to it */
682: for (i = 0; i < count; i++) {
683: if (offset_capture) {
684: add_offset_pair(result_set, (char *)stringlist[i],
685: offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
686: } else {
687: if (subpat_names[i]) {
688: add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
689: offsets[(i<<1)+1] - offsets[i<<1], 1);
690: }
691: add_next_index_stringl(result_set, (char *)stringlist[i],
692: offsets[(i<<1)+1] - offsets[i<<1], 1);
693: }
694: }
695: /* And add it to the output array */
696: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
697: }
698: } else { /* single pattern matching */
699: /* For each subpattern, insert it into the subpatterns array. */
700: for (i = 0; i < count; i++) {
701: if (offset_capture) {
702: add_offset_pair(subpats, (char *)stringlist[i],
703: offsets[(i<<1)+1] - offsets[i<<1],
704: offsets[i<<1], subpat_names[i]);
705: } else {
706: if (subpat_names[i]) {
707: add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
708: offsets[(i<<1)+1] - offsets[i<<1], 1);
709: }
710: add_next_index_stringl(subpats, (char *)stringlist[i],
711: offsets[(i<<1)+1] - offsets[i<<1], 1);
712: }
713: }
714: }
715:
716: pcre_free((void *) stringlist);
717: }
718: } else if (count == PCRE_ERROR_NOMATCH) {
719: /* If we previously set PCRE_NOTEMPTY after a null match,
720: this is not necessarily the end. We need to advance
721: the start offset, and continue. Fudge the offset values
722: to achieve this, unless we're already at the end of the string. */
723: if (g_notempty != 0 && start_offset < subject_len) {
724: offsets[0] = start_offset;
725: offsets[1] = start_offset + 1;
726: } else
727: break;
728: } else {
729: pcre_handle_exec_error(count TSRMLS_CC);
730: break;
731: }
732:
733: /* If we have matched an empty string, mimic what Perl's /g options does.
734: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
735: the match again at the same point. If this fails (picked up above) we
736: advance to the next character. */
737: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
738:
739: /* Advance to the position right after the last full match */
740: start_offset = offsets[1];
741: } while (global);
742:
743: /* Add the match sets to the output array and clean up */
1.1.1.2 ! misho 744: if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1.1 misho 745: for (i = 0; i < num_subpats; i++) {
746: if (subpat_names[i]) {
747: zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
748: strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
749: Z_ADDREF_P(match_sets[i]);
750: }
751: zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
752: }
753: efree(match_sets);
754: }
755:
756: efree(offsets);
757: efree(subpat_names);
758:
759: /* Did we encounter an error? */
760: if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
761: RETVAL_LONG(matched);
762: } else {
763: RETVAL_FALSE;
764: }
765: }
766: /* }}} */
767:
768: /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
769: Perform a Perl-style regular expression match */
770: static PHP_FUNCTION(preg_match)
771: {
772: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
773: }
774: /* }}} */
775:
1.1.1.2 ! misho 776: /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1.1 misho 777: Perform a Perl-style global regular expression match */
778: static PHP_FUNCTION(preg_match_all)
779: {
780: php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
781: }
782: /* }}} */
783:
784: /* {{{ preg_get_backref
785: */
786: static int preg_get_backref(char **str, int *backref)
787: {
788: register char in_brace = 0;
789: register char *walk = *str;
790:
791: if (walk[1] == 0)
792: return 0;
793:
794: if (*walk == '$' && walk[1] == '{') {
795: in_brace = 1;
796: walk++;
797: }
798: walk++;
799:
800: if (*walk >= '0' && *walk <= '9') {
801: *backref = *walk - '0';
802: walk++;
803: } else
804: return 0;
805:
806: if (*walk && *walk >= '0' && *walk <= '9') {
807: *backref = *backref * 10 + *walk - '0';
808: walk++;
809: }
810:
811: if (in_brace) {
812: if (*walk == 0 || *walk != '}')
813: return 0;
814: else
815: walk++;
816: }
817:
818: *str = walk;
819: return 1;
820: }
821: /* }}} */
822:
823: /* {{{ preg_do_repl_func
824: */
825: static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
826: {
827: zval *retval_ptr; /* Function return value */
828: zval **args[1]; /* Argument to pass to function */
829: zval *subpats; /* Captured subpatterns */
830: int result_len; /* Return value length */
831: int i;
832:
833: MAKE_STD_ZVAL(subpats);
834: array_init(subpats);
835: for (i = 0; i < count; i++) {
836: if (subpat_names[i]) {
837: add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
838: }
839: add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
840: }
841: args[0] = &subpats;
842:
843: if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
844: convert_to_string_ex(&retval_ptr);
845: *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
846: result_len = Z_STRLEN_P(retval_ptr);
847: zval_ptr_dtor(&retval_ptr);
848: } else {
849: if (!EG(exception)) {
850: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
851: }
852: result_len = offsets[1] - offsets[0];
853: *result = estrndup(&subject[offsets[0]], result_len);
854: }
855:
856: zval_ptr_dtor(&subpats);
857:
858: return result_len;
859: }
860: /* }}} */
861:
862: /* {{{ preg_do_eval
863: */
864: static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
865: int *offsets, int count, char **result TSRMLS_DC)
866: {
867: zval retval; /* Return value from evaluation */
868: char *eval_str_end, /* End of eval string */
869: *match, /* Current match for a backref */
870: *esc_match, /* Quote-escaped match */
871: *walk, /* Used to walk the code string */
872: *segment, /* Start of segment to append while walking */
873: walk_last; /* Last walked character */
874: int match_len; /* Length of the match */
875: int esc_match_len; /* Length of the quote-escaped match */
876: int result_len; /* Length of the result of the evaluation */
877: int backref; /* Current backref */
878: char *compiled_string_description;
879: smart_str code = {0};
880:
881: eval_str_end = eval_str + eval_str_len;
882: walk = segment = eval_str;
883: walk_last = 0;
884:
885: while (walk < eval_str_end) {
886: /* If found a backreference.. */
887: if ('\\' == *walk || '$' == *walk) {
888: smart_str_appendl(&code, segment, walk - segment);
889: if (walk_last == '\\') {
890: code.c[code.len-1] = *walk++;
891: segment = walk;
892: walk_last = 0;
893: continue;
894: }
895: segment = walk;
896: if (preg_get_backref(&walk, &backref)) {
897: if (backref < count) {
898: /* Find the corresponding string match and substitute it
899: in instead of the backref */
900: match = subject + offsets[backref<<1];
901: match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
902: if (match_len) {
1.1.1.2 ! misho 903: esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
1.1 misho 904: } else {
905: esc_match = match;
906: esc_match_len = 0;
907: }
908: } else {
909: esc_match = "";
910: esc_match_len = 0;
911: }
912: smart_str_appendl(&code, esc_match, esc_match_len);
913:
914: segment = walk;
915:
916: /* Clean up and reassign */
917: if (esc_match_len)
918: efree(esc_match);
919: continue;
920: }
921: }
922: walk++;
923: walk_last = walk[-1];
924: }
925: smart_str_appendl(&code, segment, walk - segment);
926: smart_str_0(&code);
927:
928: compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
929: /* Run the code */
930: if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
931: efree(compiled_string_description);
932: php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
933: /* zend_error() does not return in this case */
934: }
935: efree(compiled_string_description);
936: convert_to_string(&retval);
937:
938: /* Save the return value and its length */
939: *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
940: result_len = Z_STRLEN(retval);
941:
942: /* Clean up */
943: zval_dtor(&retval);
944: smart_str_free(&code);
945:
946: return result_len;
947: }
948: /* }}} */
949:
950: /* {{{ php_pcre_replace
951: */
952: PHPAPI char *php_pcre_replace(char *regex, int regex_len,
953: char *subject, int subject_len,
954: zval *replace_val, int is_callable_replace,
955: int *result_len, int limit, int *replace_count TSRMLS_DC)
956: {
957: pcre_cache_entry *pce; /* Compiled regular expression */
958:
959: /* Compile regex or get it from cache. */
960: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
961: return NULL;
962: }
963:
964: return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
965: is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
966: }
967: /* }}} */
968:
969: /* {{{ php_pcre_replace_impl() */
970: PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
971: int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
972: {
973: pcre_extra *extra = pce->extra;/* Holds results of studying */
974: pcre_extra extra_data; /* Used locally for exec options */
975: int exoptions = 0; /* Execution options */
976: int count = 0; /* Count of matched subpatterns */
977: int *offsets; /* Array of subpattern offsets */
978: char **subpat_names; /* Array for named subpatterns */
979: int num_subpats; /* Number of captured subpatterns */
980: int size_offsets; /* Size of the offsets array */
981: int new_len; /* Length of needed storage */
982: int alloc_len; /* Actual allocated length */
983: int eval_result_len=0; /* Length of the eval'ed or
984: function-returned string */
985: int match_len; /* Length of the current match */
986: int backref; /* Backreference number */
987: int eval; /* If the replacement string should be eval'ed */
988: int start_offset; /* Where the new search starts */
989: int g_notempty=0; /* If the match should not be empty */
990: int replace_len=0; /* Length of replacement string */
991: char *result, /* Result of replacement */
992: *replace=NULL, /* Replacement string */
993: *new_buf, /* Temporary buffer for re-allocation */
994: *walkbuf, /* Location of current replacement in the result */
995: *walk, /* Used to walk the replacement string */
996: *match, /* The current match */
997: *piece, /* The current piece of subject */
998: *replace_end=NULL, /* End of replacement string */
999: *eval_result, /* Result of eval or custom function */
1000: walk_last; /* Last walked character */
1001: int rc;
1002:
1003: if (extra == NULL) {
1004: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1005: extra = &extra_data;
1006: }
1007: extra->match_limit = PCRE_G(backtrack_limit);
1008: extra->match_limit_recursion = PCRE_G(recursion_limit);
1009:
1010: eval = pce->preg_options & PREG_REPLACE_EVAL;
1011: if (is_callable_replace) {
1012: if (eval) {
1013: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1014: return NULL;
1015: }
1016: } else {
1017: replace = Z_STRVAL_P(replace_val);
1018: replace_len = Z_STRLEN_P(replace_val);
1019: replace_end = replace + replace_len;
1020: }
1021:
1022: /* Calculate the size of the offsets array, and allocate memory for it. */
1023: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1024: if (rc < 0) {
1025: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1026: return NULL;
1027: }
1028: num_subpats++;
1029: size_offsets = num_subpats * 3;
1030:
1031: /*
1032: * Build a mapping from subpattern numbers to their names. We will always
1033: * allocate the table, even though there may be no named subpatterns. This
1034: * avoids somewhat more complicated logic in the inner loops.
1035: */
1036: subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1037: if (!subpat_names) {
1038: return NULL;
1039: }
1040:
1041: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1042:
1043: alloc_len = 2 * subject_len + 1;
1044: result = safe_emalloc(alloc_len, sizeof(char), 0);
1045:
1046: /* Initialize */
1047: match = NULL;
1048: *result_len = 0;
1049: start_offset = 0;
1050: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1051:
1052: while (1) {
1053: /* Execute the regular expression. */
1054: count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1055: exoptions|g_notempty, offsets, size_offsets);
1056:
1057: /* the string was already proved to be valid UTF-8 */
1058: exoptions |= PCRE_NO_UTF8_CHECK;
1059:
1060: /* Check for too many substrings condition. */
1061: if (count == 0) {
1062: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1063: count = size_offsets/3;
1064: }
1065:
1066: piece = subject + start_offset;
1067:
1068: if (count > 0 && (limit == -1 || limit > 0)) {
1069: if (replace_count) {
1070: ++*replace_count;
1071: }
1072: /* Set the match location in subject */
1073: match = subject + offsets[0];
1074:
1075: new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1076:
1077: /* If evaluating, do it and add the return string's length */
1078: if (eval) {
1079: eval_result_len = preg_do_eval(replace, replace_len, subject,
1080: offsets, count, &eval_result TSRMLS_CC);
1081: new_len += eval_result_len;
1082: } else if (is_callable_replace) {
1083: /* Use custom function to get replacement string and its length. */
1084: eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1085: new_len += eval_result_len;
1086: } else { /* do regular substitution */
1087: walk = replace;
1088: walk_last = 0;
1089: while (walk < replace_end) {
1090: if ('\\' == *walk || '$' == *walk) {
1091: if (walk_last == '\\') {
1092: walk++;
1093: walk_last = 0;
1094: continue;
1095: }
1096: if (preg_get_backref(&walk, &backref)) {
1097: if (backref < count)
1098: new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1099: continue;
1100: }
1101: }
1102: new_len++;
1103: walk++;
1104: walk_last = walk[-1];
1105: }
1106: }
1107:
1108: if (new_len + 1 > alloc_len) {
1109: alloc_len = 1 + alloc_len + 2 * new_len;
1110: new_buf = emalloc(alloc_len);
1111: memcpy(new_buf, result, *result_len);
1112: efree(result);
1113: result = new_buf;
1114: }
1115: /* copy the part of the string before the match */
1116: memcpy(&result[*result_len], piece, match-piece);
1117: *result_len += match-piece;
1118:
1119: /* copy replacement and backrefs */
1120: walkbuf = result + *result_len;
1121:
1122: /* If evaluating or using custom function, copy result to the buffer
1123: * and clean up. */
1124: if (eval || is_callable_replace) {
1125: memcpy(walkbuf, eval_result, eval_result_len);
1126: *result_len += eval_result_len;
1127: STR_FREE(eval_result);
1128: } else { /* do regular backreference copying */
1129: walk = replace;
1130: walk_last = 0;
1131: while (walk < replace_end) {
1132: if ('\\' == *walk || '$' == *walk) {
1133: if (walk_last == '\\') {
1134: *(walkbuf-1) = *walk++;
1135: walk_last = 0;
1136: continue;
1137: }
1138: if (preg_get_backref(&walk, &backref)) {
1139: if (backref < count) {
1140: match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1141: memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1142: walkbuf += match_len;
1143: }
1144: continue;
1145: }
1146: }
1147: *walkbuf++ = *walk++;
1148: walk_last = walk[-1];
1149: }
1150: *walkbuf = '\0';
1151: /* increment the result length by how much we've added to the string */
1152: *result_len += walkbuf - (result + *result_len);
1153: }
1154:
1155: if (limit != -1)
1156: limit--;
1157:
1158: } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1159: /* If we previously set PCRE_NOTEMPTY after a null match,
1160: this is not necessarily the end. We need to advance
1161: the start offset, and continue. Fudge the offset values
1162: to achieve this, unless we're already at the end of the string. */
1163: if (g_notempty != 0 && start_offset < subject_len) {
1164: offsets[0] = start_offset;
1165: offsets[1] = start_offset + 1;
1166: memcpy(&result[*result_len], piece, 1);
1167: (*result_len)++;
1168: } else {
1169: new_len = *result_len + subject_len - start_offset;
1170: if (new_len + 1 > alloc_len) {
1171: alloc_len = new_len + 1; /* now we know exactly how long it is */
1172: new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1173: memcpy(new_buf, result, *result_len);
1174: efree(result);
1175: result = new_buf;
1176: }
1177: /* stick that last bit of string on our output */
1178: memcpy(&result[*result_len], piece, subject_len - start_offset);
1179: *result_len += subject_len - start_offset;
1180: result[*result_len] = '\0';
1181: break;
1182: }
1183: } else {
1184: pcre_handle_exec_error(count TSRMLS_CC);
1185: efree(result);
1186: result = NULL;
1187: break;
1188: }
1189:
1190: /* If we have matched an empty string, mimic what Perl's /g options does.
1191: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1192: the match again at the same point. If this fails (picked up above) we
1193: advance to the next character. */
1194: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1195:
1196: /* Advance to the next piece. */
1197: start_offset = offsets[1];
1198: }
1199:
1200: efree(offsets);
1201: efree(subpat_names);
1202:
1203: return result;
1204: }
1205: /* }}} */
1206:
1207: /* {{{ php_replace_in_subject
1208: */
1209: static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1210: {
1211: zval **regex_entry,
1212: **replace_entry = NULL,
1213: *replace_value,
1214: empty_replace;
1215: char *subject_value,
1216: *result;
1217: int subject_len;
1218:
1219: /* Make sure we're dealing with strings. */
1220: convert_to_string_ex(subject);
1221: /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1222: ZVAL_STRINGL(&empty_replace, "", 0, 0);
1223:
1224: /* If regex is an array */
1225: if (Z_TYPE_P(regex) == IS_ARRAY) {
1226: /* Duplicate subject string for repeated replacement */
1227: subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1228: subject_len = Z_STRLEN_PP(subject);
1229: *result_len = subject_len;
1230:
1231: zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1232:
1233: replace_value = replace;
1234: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1235: zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1236:
1237: /* For each entry in the regex array, get the entry */
1238: while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1239: /* Make sure we're dealing with strings. */
1240: convert_to_string_ex(regex_entry);
1241:
1242: /* If replace is an array and not a callable construct */
1243: if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1244: /* Get current entry */
1245: if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1246: if (!is_callable_replace) {
1247: convert_to_string_ex(replace_entry);
1248: }
1249: replace_value = *replace_entry;
1250: zend_hash_move_forward(Z_ARRVAL_P(replace));
1251: } else {
1252: /* We've run out of replacement strings, so use an empty one */
1253: replace_value = &empty_replace;
1254: }
1255: }
1256:
1257: /* Do the actual replacement and put the result back into subject_value
1258: for further replacements. */
1259: if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1260: Z_STRLEN_PP(regex_entry),
1261: subject_value,
1262: subject_len,
1263: replace_value,
1264: is_callable_replace,
1265: result_len,
1266: limit,
1267: replace_count TSRMLS_CC)) != NULL) {
1268: efree(subject_value);
1269: subject_value = result;
1270: subject_len = *result_len;
1271: } else {
1272: efree(subject_value);
1273: return NULL;
1274: }
1275:
1276: zend_hash_move_forward(Z_ARRVAL_P(regex));
1277: }
1278:
1279: return subject_value;
1280: } else {
1281: result = php_pcre_replace(Z_STRVAL_P(regex),
1282: Z_STRLEN_P(regex),
1283: Z_STRVAL_PP(subject),
1284: Z_STRLEN_PP(subject),
1285: replace,
1286: is_callable_replace,
1287: result_len,
1288: limit,
1289: replace_count TSRMLS_CC);
1290: return result;
1291: }
1292: }
1293: /* }}} */
1294:
1295: /* {{{ preg_replace_impl
1296: */
1297: static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1298: {
1299: zval **regex,
1300: **replace,
1301: **subject,
1302: **subject_entry,
1303: **zcount = NULL;
1304: char *result;
1305: int result_len;
1306: int limit_val = -1;
1307: long limit = -1;
1308: char *string_key;
1309: ulong num_key;
1310: char *callback_name;
1311: int replace_count=0, old_replace_count;
1312:
1313: /* Get function parameters and do error-checking. */
1314: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1315: return;
1316: }
1317:
1318: if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1319: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1320: RETURN_FALSE;
1321: }
1322:
1323: SEPARATE_ZVAL(replace);
1324: if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1325: convert_to_string_ex(replace);
1326: }
1327: if (is_callable_replace) {
1328: if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1329: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1330: efree(callback_name);
1331: MAKE_COPY_ZVAL(subject, return_value);
1332: return;
1333: }
1334: efree(callback_name);
1335: }
1336:
1337: SEPARATE_ZVAL(regex);
1338: SEPARATE_ZVAL(subject);
1339:
1340: if (ZEND_NUM_ARGS() > 3) {
1341: limit_val = limit;
1342: }
1343:
1344: if (Z_TYPE_PP(regex) != IS_ARRAY)
1345: convert_to_string_ex(regex);
1346:
1347: /* if subject is an array */
1348: if (Z_TYPE_PP(subject) == IS_ARRAY) {
1349: array_init(return_value);
1350: zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1351:
1352: /* For each subject entry, convert it to string, then perform replacement
1353: and add the result to the return_value array. */
1354: while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1355: SEPARATE_ZVAL(subject_entry);
1356: old_replace_count = replace_count;
1357: if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1358: if (!is_filter || replace_count > old_replace_count) {
1359: /* Add to return array */
1360: switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1361: {
1362: case HASH_KEY_IS_STRING:
1363: add_assoc_stringl(return_value, string_key, result, result_len, 0);
1364: break;
1365:
1366: case HASH_KEY_IS_LONG:
1367: add_index_stringl(return_value, num_key, result, result_len, 0);
1368: break;
1369: }
1370: } else {
1371: efree(result);
1372: }
1373: }
1374:
1375: zend_hash_move_forward(Z_ARRVAL_PP(subject));
1376: }
1377: } else { /* if subject is not an array */
1378: old_replace_count = replace_count;
1379: if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1380: if (!is_filter || replace_count > old_replace_count) {
1381: RETVAL_STRINGL(result, result_len, 0);
1382: } else {
1383: efree(result);
1384: }
1385: }
1386: }
1387: if (ZEND_NUM_ARGS() > 4) {
1388: zval_dtor(*zcount);
1389: ZVAL_LONG(*zcount, replace_count);
1390: }
1391:
1392: }
1393: /* }}} */
1394:
1395: /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1396: Perform Perl-style regular expression replacement. */
1397: static PHP_FUNCTION(preg_replace)
1398: {
1399: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1400: }
1401: /* }}} */
1402:
1403: /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1404: Perform Perl-style regular expression replacement using replacement callback. */
1405: static PHP_FUNCTION(preg_replace_callback)
1406: {
1407: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1408: }
1409: /* }}} */
1410:
1411: /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1412: Perform Perl-style regular expression replacement and only return matches. */
1413: static PHP_FUNCTION(preg_filter)
1414: {
1415: preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1416: }
1417: /* }}} */
1418:
1419: /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1420: Split string into an array using a perl-style regular expression as a delimiter */
1421: static PHP_FUNCTION(preg_split)
1422: {
1423: char *regex; /* Regular expression */
1424: char *subject; /* String to match against */
1425: int regex_len;
1426: int subject_len;
1427: long limit_val = -1;/* Integer value of limit */
1428: long flags = 0; /* Match control flags */
1429: pcre_cache_entry *pce; /* Compiled regular expression */
1430:
1431: /* Get function parameters and do error checking */
1432: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1433: &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1434: RETURN_FALSE;
1435: }
1436:
1437: /* Compile regex or get it from cache. */
1438: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1439: RETURN_FALSE;
1440: }
1441:
1442: php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1443: }
1444: /* }}} */
1445:
1446: /* {{{ php_pcre_split
1447: */
1448: PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1449: long limit_val, long flags TSRMLS_DC)
1450: {
1451: pcre_extra *extra = NULL; /* Holds results of studying */
1452: pcre *re_bump = NULL; /* Regex instance for empty matches */
1453: pcre_extra *extra_bump = NULL; /* Almost dummy */
1454: pcre_extra extra_data; /* Used locally for exec options */
1455: int *offsets; /* Array of subpattern offsets */
1456: int size_offsets; /* Size of the offsets array */
1457: int exoptions = 0; /* Execution options */
1458: int count = 0; /* Count of matched subpatterns */
1459: int start_offset; /* Where the new search starts */
1460: int next_offset; /* End of the last delimiter match + 1 */
1461: int g_notempty = 0; /* If the match should not be empty */
1462: char *last_match; /* Location of last match */
1463: int rc;
1464: int no_empty; /* If NO_EMPTY flag is set */
1465: int delim_capture; /* If delimiters should be captured */
1466: int offset_capture; /* If offsets should be captured */
1467:
1468: no_empty = flags & PREG_SPLIT_NO_EMPTY;
1469: delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1470: offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1471:
1472: if (limit_val == 0) {
1473: limit_val = -1;
1474: }
1475:
1476: if (extra == NULL) {
1477: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1478: extra = &extra_data;
1479: }
1480: extra->match_limit = PCRE_G(backtrack_limit);
1481: extra->match_limit_recursion = PCRE_G(recursion_limit);
1482:
1483: /* Initialize return value */
1484: array_init(return_value);
1485:
1486: /* Calculate the size of the offsets array, and allocate memory for it. */
1487: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1488: if (rc < 0) {
1489: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1490: RETURN_FALSE;
1491: }
1492: size_offsets = (size_offsets + 1) * 3;
1493: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1494:
1495: /* Start at the beginning of the string */
1496: start_offset = 0;
1497: next_offset = 0;
1498: last_match = subject;
1499: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1500:
1501: /* Get next piece if no limit or limit not yet reached and something matched*/
1502: while ((limit_val == -1 || limit_val > 1)) {
1503: count = pcre_exec(pce->re, extra, subject,
1504: subject_len, start_offset,
1505: exoptions|g_notempty, offsets, size_offsets);
1506:
1507: /* the string was already proved to be valid UTF-8 */
1508: exoptions |= PCRE_NO_UTF8_CHECK;
1509:
1510: /* Check for too many substrings condition. */
1511: if (count == 0) {
1512: php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1513: count = size_offsets/3;
1514: }
1515:
1516: /* If something matched */
1517: if (count > 0) {
1518: if (!no_empty || &subject[offsets[0]] != last_match) {
1519:
1520: if (offset_capture) {
1521: /* Add (match, offset) pair to the return value */
1522: add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1523: } else {
1524: /* Add the piece to the return value */
1525: add_next_index_stringl(return_value, last_match,
1526: &subject[offsets[0]]-last_match, 1);
1527: }
1528:
1529: /* One less left to do */
1530: if (limit_val != -1)
1531: limit_val--;
1532: }
1533:
1534: last_match = &subject[offsets[1]];
1535: next_offset = offsets[1];
1536:
1537: if (delim_capture) {
1538: int i, match_len;
1539: for (i = 1; i < count; i++) {
1540: match_len = offsets[(i<<1)+1] - offsets[i<<1];
1541: /* If we have matched a delimiter */
1542: if (!no_empty || match_len > 0) {
1543: if (offset_capture) {
1544: add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1545: } else {
1546: add_next_index_stringl(return_value,
1547: &subject[offsets[i<<1]],
1548: match_len, 1);
1549: }
1550: }
1551: }
1552: }
1553: } else if (count == PCRE_ERROR_NOMATCH) {
1554: /* If we previously set PCRE_NOTEMPTY after a null match,
1555: this is not necessarily the end. We need to advance
1556: the start offset, and continue. Fudge the offset values
1557: to achieve this, unless we're already at the end of the string. */
1558: if (g_notempty != 0 && start_offset < subject_len) {
1559: if (pce->compile_options & PCRE_UTF8) {
1560: if (re_bump == NULL) {
1561: int dummy;
1562:
1563: if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1564: RETURN_FALSE;
1565: }
1566: }
1567: count = pcre_exec(re_bump, extra_bump, subject,
1568: subject_len, start_offset,
1569: exoptions, offsets, size_offsets);
1570: if (count < 1) {
1571: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1572: RETURN_FALSE;
1573: }
1574: } else {
1575: offsets[0] = start_offset;
1576: offsets[1] = start_offset + 1;
1577: }
1578: } else
1579: break;
1580: } else {
1581: pcre_handle_exec_error(count TSRMLS_CC);
1582: break;
1583: }
1584:
1585: /* If we have matched an empty string, mimic what Perl's /g options does.
1586: This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1587: the match again at the same point. If this fails (picked up above) we
1588: advance to the next character. */
1589: g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1590:
1591: /* Advance to the position right after the last full match */
1592: start_offset = offsets[1];
1593: }
1594:
1595:
1596: start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1597:
1598: if (!no_empty || start_offset < subject_len)
1599: {
1600: if (offset_capture) {
1601: /* Add the last (match, offset) pair to the return value */
1602: add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1603: } else {
1604: /* Add the last piece to the return value */
1605: add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1606: }
1607: }
1608:
1609:
1610: /* Clean up */
1611: efree(offsets);
1612: }
1613: /* }}} */
1614:
1615: /* {{{ proto string preg_quote(string str [, string delim_char])
1616: Quote regular expression characters plus an optional character */
1617: static PHP_FUNCTION(preg_quote)
1618: {
1619: int in_str_len;
1620: char *in_str; /* Input string argument */
1621: char *in_str_end; /* End of the input string */
1622: int delim_len = 0;
1623: char *delim = NULL; /* Additional delimiter argument */
1624: char *out_str, /* Output string with quoted characters */
1625: *p, /* Iterator for input string */
1626: *q, /* Iterator for output string */
1627: delim_char=0, /* Delimiter character to be quoted */
1628: c; /* Current character */
1629: zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1630:
1631: /* Get the arguments and check for errors */
1632: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1633: &delim, &delim_len) == FAILURE) {
1634: return;
1635: }
1636:
1637: in_str_end = in_str + in_str_len;
1638:
1639: /* Nothing to do if we got an empty string */
1640: if (in_str == in_str_end) {
1641: RETURN_EMPTY_STRING();
1642: }
1643:
1644: if (delim && *delim) {
1645: delim_char = delim[0];
1646: quote_delim = 1;
1647: }
1648:
1649: /* Allocate enough memory so that even if each character
1650: is quoted, we won't run out of room */
1651: out_str = safe_emalloc(4, in_str_len, 1);
1652:
1653: /* Go through the string and quote necessary characters */
1654: for(p = in_str, q = out_str; p != in_str_end; p++) {
1655: c = *p;
1656: switch(c) {
1657: case '.':
1658: case '\\':
1659: case '+':
1660: case '*':
1661: case '?':
1662: case '[':
1663: case '^':
1664: case ']':
1665: case '$':
1666: case '(':
1667: case ')':
1668: case '{':
1669: case '}':
1670: case '=':
1671: case '!':
1672: case '>':
1673: case '<':
1674: case '|':
1675: case ':':
1676: case '-':
1677: *q++ = '\\';
1678: *q++ = c;
1679: break;
1680:
1681: case '\0':
1682: *q++ = '\\';
1683: *q++ = '0';
1684: *q++ = '0';
1685: *q++ = '0';
1686: break;
1687:
1688: default:
1689: if (quote_delim && c == delim_char)
1690: *q++ = '\\';
1691: *q++ = c;
1692: break;
1693: }
1694: }
1695: *q = '\0';
1696:
1697: /* Reallocate string and return it */
1698: RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1699: }
1700: /* }}} */
1701:
1702: /* {{{ proto array preg_grep(string regex, array input [, int flags])
1703: Searches array and returns entries which match regex */
1704: static PHP_FUNCTION(preg_grep)
1705: {
1706: char *regex; /* Regular expression */
1707: int regex_len;
1708: zval *input; /* Input array */
1709: long flags = 0; /* Match control flags */
1710: pcre_cache_entry *pce; /* Compiled regular expression */
1711:
1712: /* Get arguments and do error checking */
1713: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1714: &input, &flags) == FAILURE) {
1715: return;
1716: }
1717:
1718: /* Compile regex or get it from cache. */
1719: if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1720: RETURN_FALSE;
1721: }
1722:
1723: php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1724: }
1725: /* }}} */
1726:
1727: PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1728: {
1729: zval **entry; /* An entry in the input array */
1730: pcre_extra *extra = pce->extra;/* Holds results of studying */
1731: pcre_extra extra_data; /* Used locally for exec options */
1732: int *offsets; /* Array of subpattern offsets */
1733: int size_offsets; /* Size of the offsets array */
1734: int count = 0; /* Count of matched subpatterns */
1735: char *string_key;
1736: ulong num_key;
1737: zend_bool invert; /* Whether to return non-matching
1738: entries */
1739: int rc;
1740:
1741: invert = flags & PREG_GREP_INVERT ? 1 : 0;
1742:
1743: if (extra == NULL) {
1744: extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1745: extra = &extra_data;
1746: }
1747: extra->match_limit = PCRE_G(backtrack_limit);
1748: extra->match_limit_recursion = PCRE_G(recursion_limit);
1749:
1750: /* Calculate the size of the offsets array, and allocate memory for it. */
1751: rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1752: if (rc < 0) {
1753: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1754: RETURN_FALSE;
1755: }
1756: size_offsets = (size_offsets + 1) * 3;
1757: offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1758:
1759: /* Initialize return array */
1760: array_init(return_value);
1761:
1762: PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1763:
1764: /* Go through the input array */
1765: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1766: while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1767: zval subject = **entry;
1768:
1769: if (Z_TYPE_PP(entry) != IS_STRING) {
1770: zval_copy_ctor(&subject);
1771: convert_to_string(&subject);
1772: }
1773:
1774: /* Perform the match */
1775: count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1776: Z_STRLEN(subject), 0,
1777: 0, offsets, size_offsets);
1778:
1779: /* Check for too many substrings condition. */
1780: if (count == 0) {
1781: php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1782: count = size_offsets/3;
1783: } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1784: pcre_handle_exec_error(count TSRMLS_CC);
1785: break;
1786: }
1787:
1788: /* If the entry fits our requirements */
1789: if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1790:
1791: Z_ADDREF_PP(entry);
1792:
1793: /* Add to return array */
1794: switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1795: {
1796: case HASH_KEY_IS_STRING:
1797: zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1798: strlen(string_key)+1, entry, sizeof(zval *), NULL);
1799: break;
1800:
1801: case HASH_KEY_IS_LONG:
1802: zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1803: sizeof(zval *), NULL);
1804: break;
1805: }
1806: }
1807:
1808: if (Z_TYPE_PP(entry) != IS_STRING) {
1809: zval_dtor(&subject);
1810: }
1811:
1812: zend_hash_move_forward(Z_ARRVAL_P(input));
1813: }
1814: zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1815: /* Clean up */
1816: efree(offsets);
1817: }
1818: /* }}} */
1819:
1820: /* {{{ proto int preg_last_error()
1821: Returns the error code of the last regexp execution. */
1822: static PHP_FUNCTION(preg_last_error)
1823: {
1824: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1825: return;
1826: }
1827:
1828: RETURN_LONG(PCRE_G(error_code));
1829: }
1830: /* }}} */
1831:
1832: /* {{{ module definition structures */
1833:
1834: /* {{{ arginfo */
1835: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1836: ZEND_ARG_INFO(0, pattern)
1837: ZEND_ARG_INFO(0, subject)
1838: ZEND_ARG_INFO(1, subpatterns) /* array */
1839: ZEND_ARG_INFO(0, flags)
1840: ZEND_ARG_INFO(0, offset)
1841: ZEND_END_ARG_INFO()
1842:
1843: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
1844: ZEND_ARG_INFO(0, pattern)
1845: ZEND_ARG_INFO(0, subject)
1846: ZEND_ARG_INFO(1, subpatterns) /* array */
1847: ZEND_ARG_INFO(0, flags)
1848: ZEND_ARG_INFO(0, offset)
1849: ZEND_END_ARG_INFO()
1850:
1851: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1852: ZEND_ARG_INFO(0, regex)
1853: ZEND_ARG_INFO(0, replace)
1854: ZEND_ARG_INFO(0, subject)
1855: ZEND_ARG_INFO(0, limit)
1856: ZEND_ARG_INFO(1, count)
1857: ZEND_END_ARG_INFO()
1858:
1859: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1860: ZEND_ARG_INFO(0, regex)
1861: ZEND_ARG_INFO(0, callback)
1862: ZEND_ARG_INFO(0, subject)
1863: ZEND_ARG_INFO(0, limit)
1864: ZEND_ARG_INFO(1, count)
1865: ZEND_END_ARG_INFO()
1866:
1867: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1868: ZEND_ARG_INFO(0, pattern)
1869: ZEND_ARG_INFO(0, subject)
1870: ZEND_ARG_INFO(0, limit)
1871: ZEND_ARG_INFO(0, flags)
1872: ZEND_END_ARG_INFO()
1873:
1874: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1875: ZEND_ARG_INFO(0, str)
1876: ZEND_ARG_INFO(0, delim_char)
1877: ZEND_END_ARG_INFO()
1878:
1879: ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1880: ZEND_ARG_INFO(0, regex)
1881: ZEND_ARG_INFO(0, input) /* array */
1882: ZEND_ARG_INFO(0, flags)
1883: ZEND_END_ARG_INFO()
1884:
1885: ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1886: ZEND_END_ARG_INFO()
1887: /* }}} */
1888:
1889: static const zend_function_entry pcre_functions[] = {
1890: PHP_FE(preg_match, arginfo_preg_match)
1891: PHP_FE(preg_match_all, arginfo_preg_match_all)
1892: PHP_FE(preg_replace, arginfo_preg_replace)
1893: PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
1894: PHP_FE(preg_filter, arginfo_preg_replace)
1895: PHP_FE(preg_split, arginfo_preg_split)
1896: PHP_FE(preg_quote, arginfo_preg_quote)
1897: PHP_FE(preg_grep, arginfo_preg_grep)
1898: PHP_FE(preg_last_error, arginfo_preg_last_error)
1899: PHP_FE_END
1900: };
1901:
1902: zend_module_entry pcre_module_entry = {
1903: STANDARD_MODULE_HEADER,
1904: "pcre",
1905: pcre_functions,
1906: PHP_MINIT(pcre),
1907: PHP_MSHUTDOWN(pcre),
1908: NULL,
1909: NULL,
1910: PHP_MINFO(pcre),
1911: NO_VERSION_YET,
1912: PHP_MODULE_GLOBALS(pcre),
1913: PHP_GINIT(pcre),
1914: PHP_GSHUTDOWN(pcre),
1915: NULL,
1916: STANDARD_MODULE_PROPERTIES_EX
1917: };
1918:
1919: #ifdef COMPILE_DL_PCRE
1920: ZEND_GET_MODULE(pcre)
1921: #endif
1922:
1923: /* }}} */
1924:
1925: #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1926:
1927: /*
1928: * Local variables:
1929: * tab-width: 4
1930: * c-basic-offset: 4
1931: * End:
1932: * vim600: sw=4 ts=4 fdm=marker
1933: * vim<600: sw=4 ts=4
1934: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>