File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / ereg / ereg.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:03:42 2014 UTC (10 years, 4 months ago) by misho
Branches: php, MAIN
CVS tags: v5_4_29, HEAD
php 5.4.29

    1: /*
    2:    +----------------------------------------------------------------------+
    3:    | PHP Version 5                                                        |
    4:    +----------------------------------------------------------------------+
    5:    | Copyright (c) 1997-2014 The PHP Group                                |
    6:    +----------------------------------------------------------------------+
    7:    | This source file is subject to version 3.01 of the PHP license,      |
    8:    | that is bundled with this package in the file LICENSE, and is        |
    9:    | available through the world-wide-web at the following url:           |
   10:    | http://www.php.net/license/3_01.txt                                  |
   11:    | If you did not receive a copy of the PHP license and are unable to   |
   12:    | obtain it through the world-wide-web, please send a note to          |
   13:    | license@php.net so we can mail you a copy immediately.               |
   14:    +----------------------------------------------------------------------+
   15:    | Authors: Rasmus Lerdorf <rasmus@php.net>                             |
   16:    |          Jim Winstead <jimw@php.net>                                 |
   17:    |          Jaakko Hyvätti <jaakko@hyvatti.iki.fi>                      | 
   18:    +----------------------------------------------------------------------+
   19:  */
   20: /* $Id: ereg.c,v 1.1.1.4 2014/06/15 20:03:42 misho Exp $ */
   21: 
   22: #include <stdio.h>
   23: #include <ctype.h>
   24: #include "php.h"
   25: #include "ext/standard/php_string.h"
   26: #include "php_ereg.h"
   27: #include "ext/standard/info.h"
   28: 
   29: /* {{{ arginfo */
   30: ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2)
   31: 	ZEND_ARG_INFO(0, pattern)
   32: 	ZEND_ARG_INFO(0, string) 
   33: 	ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
   34: ZEND_END_ARG_INFO()
   35: 
   36: ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0)
   37: 	ZEND_ARG_INFO(0, pattern)
   38: 	ZEND_ARG_INFO(0, replacement)
   39: 	ZEND_ARG_INFO(0, string)
   40: ZEND_END_ARG_INFO()
   41: 
   42: ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2)
   43: 	ZEND_ARG_INFO(0, pattern)
   44: 	ZEND_ARG_INFO(0, string) 
   45: 	ZEND_ARG_INFO(0, limit)  
   46: ZEND_END_ARG_INFO()
   47: 
   48: ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0)
   49: 	ZEND_ARG_INFO(0, string)
   50: ZEND_END_ARG_INFO()
   51: /* }}} */
   52: 
   53: /* {{{ Function table */
   54: const zend_function_entry ereg_functions[] = {
   55: 	PHP_DEP_FE(ereg,			arginfo_ereg)
   56: 	PHP_DEP_FE(ereg_replace,	arginfo_ereg_replace)
   57: 	PHP_DEP_FE(eregi,			arginfo_ereg)
   58: 	PHP_DEP_FE(eregi_replace,	arginfo_ereg_replace)
   59: 	PHP_DEP_FE(split,			arginfo_split)
   60: 	PHP_DEP_FE(spliti,			arginfo_split)
   61: 	PHP_DEP_FE(sql_regcase,		arginfo_sql_regcase)
   62: 	PHP_FE_END
   63: };
   64: /* }}} */
   65: 
   66: /* {{{ reg_cache */
   67: typedef struct {
   68: 	regex_t preg;
   69: 	int cflags;
   70: 	unsigned long lastuse;
   71: } reg_cache;
   72: static int reg_magic = 0;
   73: #define EREG_CACHE_SIZE 4096
   74: /* }}} */
   75: 
   76: ZEND_DECLARE_MODULE_GLOBALS(ereg)
   77: static PHP_GINIT_FUNCTION(ereg);
   78: static PHP_GSHUTDOWN_FUNCTION(ereg);
   79: 
   80: /* {{{ Module entry */
   81: zend_module_entry ereg_module_entry = {
   82: 	STANDARD_MODULE_HEADER,
   83: 	"ereg",
   84: 	ereg_functions,
   85: 	NULL,
   86: 	NULL,
   87: 	NULL,
   88: 	NULL,
   89: 	PHP_MINFO(ereg),
   90: 	NO_VERSION_YET,
   91: 	PHP_MODULE_GLOBALS(ereg),
   92: 	PHP_GINIT(ereg),
   93: 	PHP_GSHUTDOWN(ereg),
   94: 	NULL,
   95: 	STANDARD_MODULE_PROPERTIES_EX
   96: };
   97: /* }}} */
   98: 
   99: /* {{{ COMPILE_DL_EREG */
  100: #ifdef COMPILE_DL_EREG
  101: ZEND_GET_MODULE(ereg)
  102: #endif
  103: /* }}} */
  104: 
  105: /* {{{ ereg_lru_cmp */
  106: static int ereg_lru_cmp(const void *a, const void *b TSRMLS_DC)
  107: {
  108: 	Bucket *f = *((Bucket **) a);
  109: 	Bucket *s = *((Bucket **) b);
  110: 
  111: 	if (((reg_cache *)f->pData)->lastuse <
  112: 				((reg_cache *)s->pData)->lastuse) {
  113: 		return -1;
  114: 	} else if (((reg_cache *)f->pData)->lastuse ==
  115: 				((reg_cache *)s->pData)->lastuse) {
  116: 		return 0;
  117: 	} else {
  118: 		return 1;
  119: 	}
  120: }
  121: /* }}} */
  122: 
  123: /* {{{ static ereg_clean_cache */
  124: static int ereg_clean_cache(void *data, void *arg TSRMLS_DC)
  125: {
  126: 	int *num_clean = (int *)arg;
  127: 
  128: 	if (*num_clean > 0) {
  129: 		(*num_clean)--;
  130: 		return ZEND_HASH_APPLY_REMOVE;
  131: 	} else {
  132: 		return ZEND_HASH_APPLY_STOP;
  133: 	}
  134: }
  135: /* }}} */
  136: 
  137: /* {{{ _php_regcomp
  138:  */
  139: static int _php_regcomp(regex_t *preg, const char *pattern, int cflags TSRMLS_DC)
  140: {
  141: 	int r = 0;
  142: 	int patlen = strlen(pattern);
  143: 	reg_cache *rc = NULL;
  144: 
  145: 	if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) {
  146: 		/* easier than dealing with overflow as it happens */
  147: 		if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) {
  148: 			zend_hash_clean(&EREG(ht_rc));
  149: 			EREG(lru_counter) = 0;
  150: 		} else {
  151: 			int num_clean = EREG_CACHE_SIZE / 4;
  152: 			zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC);
  153: 		}
  154: 	}
  155: 
  156: 	if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
  157: 	   && rc->cflags == cflags) {
  158: #ifdef HAVE_REGEX_T_RE_MAGIC
  159: 		/*
  160: 		 * We use a saved magic number to see whether cache is corrupted, and if it
  161: 		 * is, we flush it and compile the pattern from scratch.
  162: 		 */
  163: 		if (rc->preg.re_magic != reg_magic) {
  164: 			zend_hash_clean(&EREG(ht_rc));
  165: 			EREG(lru_counter) = 0;
  166: 		} else {
  167: 			memcpy(preg, &rc->preg, sizeof(*preg));
  168: 			return r;
  169: 		}
  170: 	}
  171: 
  172: 	r = regcomp(preg, pattern, cflags);
  173: 	if(!r) {
  174: 		reg_cache rcp;
  175: 
  176: 		rcp.cflags = cflags;
  177: 		rcp.lastuse = ++(EREG(lru_counter));
  178: 		memcpy(&rcp.preg, preg, sizeof(*preg));
  179: 		/*
  180: 		 * Since we don't have access to the actual MAGIC1 definition in the private
  181: 		 * header file, we save the magic value immediately after compilation. Hopefully,
  182: 		 * it's good.
  183: 		 */
  184: 		if (!reg_magic) reg_magic = preg->re_magic;
  185: 		zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
  186: 						 (void *) &rcp, sizeof(rcp), NULL);
  187: 	}
  188: #else
  189: 		memcpy(preg, &rc->preg, sizeof(*preg));
  190: 	} else {
  191: 		r = regcomp(preg, pattern, cflags);
  192: 		if(!r) {
  193: 			reg_cache rcp;
  194: 
  195: 			rcp.cflags = cflags;
  196: 			rcp.lastuse = ++(EREG(lru_counter));
  197: 			memcpy(&rcp.preg, preg, sizeof(*preg));
  198: 			zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
  199: 							 (void *) &rcp, sizeof(rcp), NULL);
  200: 		}
  201: 	}
  202: #endif
  203: 	return r;
  204: }
  205: /* }}} */
  206: 
  207: static void _free_ereg_cache(reg_cache *rc) 
  208: {
  209: 	regfree(&rc->preg);
  210: }
  211: 
  212: #undef regfree
  213: #define regfree(a);
  214: #undef regcomp
  215: #define regcomp(a, b, c) _php_regcomp(a, b, c TSRMLS_CC)
  216: 
  217: /* {{{ PHP_GINIT_FUNCTION
  218:  */
  219: static PHP_GINIT_FUNCTION(ereg)
  220: {
  221: 	zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
  222: 	ereg_globals->lru_counter = 0;
  223: }
  224: /* }}} */
  225: 
  226: /* {{{ PHP_GSHUTDOWN_FUNCTION
  227:  */
  228: static PHP_GSHUTDOWN_FUNCTION(ereg)
  229: {
  230: 	zend_hash_destroy(&ereg_globals->ht_rc);
  231: }
  232: /* }}} */
  233: 
  234: PHP_MINFO_FUNCTION(ereg)
  235: {
  236: 	php_info_print_table_start();
  237: #if HSREGEX
  238: 	php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
  239: #else
  240: 	php_info_print_table_row(2, "Regex Library", "System library enabled");
  241: #endif
  242: 	php_info_print_table_end();
  243: }
  244: 
  245: 
  246: /* {{{ php_ereg_eprint
  247:  * php_ereg_eprint - convert error number to name
  248:  */
  249: static void php_ereg_eprint(int err, regex_t *re TSRMLS_DC) {
  250: 	char *buf = NULL, *message = NULL;
  251: 	size_t len;
  252: 	size_t buf_len;
  253: 
  254: #ifdef REG_ITOA
  255: 	/* get the length of the message */
  256: 	buf_len = regerror(REG_ITOA | err, re, NULL, 0);
  257: 	if (buf_len) {
  258: 		buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
  259: 		if (!buf) return; /* fail silently */
  260: 		/* finally, get the error message */
  261: 		regerror(REG_ITOA | err, re, buf, buf_len);
  262: 	}
  263: #else
  264: 	buf_len = 0;
  265: #endif
  266: 	len = regerror(err, re, NULL, 0);
  267: 	if (len) {
  268: 		message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
  269: 		if (!message) {
  270: 			return; /* fail silently */
  271: 		}
  272: 		if (buf_len) {
  273: 			snprintf(message, buf_len, "%s: ", buf);
  274: 			buf_len += 1; /* so pointer math below works */
  275: 		}
  276: 		/* drop the message into place */
  277: 		regerror(err, re, message + buf_len, len);
  278: 
  279: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
  280: 	}
  281: 
  282: 	STR_FREE(buf);
  283: 	STR_FREE(message);
  284: }
  285: /* }}} */
  286: 
  287: /* {{{ php_ereg
  288:  */
  289: static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
  290: {
  291: 	zval **regex,			/* Regular expression */
  292: 		**array = NULL;		/* Optional register array */
  293: 	char *findin;		/* String to apply expression to */
  294: 	int findin_len;
  295: 	regex_t re;
  296: 	regmatch_t *subs;
  297: 	int err, match_len, string_len;
  298: 	uint i;
  299: 	int copts = 0;
  300: 	off_t start, end;
  301: 	char *buf = NULL;
  302: 	char *string = NULL;
  303: 	int   argc = ZEND_NUM_ARGS();
  304: 
  305: 	if (zend_parse_parameters(argc TSRMLS_CC, "Zs|Z", &regex, &findin, &findin_len, &array) == FAILURE) {
  306: 		return;
  307: 	}
  308: 
  309: 	if (icase) {
  310: 		copts |= REG_ICASE;
  311: 	}
  312: 	
  313: 	if (argc == 2) {
  314: 		copts |= REG_NOSUB;
  315: 	}
  316: 
  317: 	/* compile the regular expression from the supplied regex */
  318: 	if (Z_TYPE_PP(regex) == IS_STRING) {
  319: 		err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
  320: 	} else {
  321: 		/* we convert numbers to integers and treat them as a string */
  322: 		if (Z_TYPE_PP(regex) == IS_DOUBLE) {
  323: 			convert_to_long_ex(regex);	/* get rid of decimal places */
  324: 		}
  325: 		convert_to_string_ex(regex);
  326: 		/* don't bother doing an extended regex with just a number */
  327: 		err = regcomp(&re, Z_STRVAL_PP(regex), copts);
  328: 	}
  329: 
  330: 	if (err) {
  331: 		php_ereg_eprint(err, &re TSRMLS_CC);
  332: 		RETURN_FALSE;
  333: 	}
  334: 
  335: 	/* make a copy of the string we're looking in */
  336: 	string = estrndup(findin, findin_len);
  337: 
  338: 	/* allocate storage for (sub-)expression-matches */
  339: 	subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
  340: 	
  341: 	/* actually execute the regular expression */
  342: 	err = regexec(&re, string, re.re_nsub+1, subs, 0);
  343: 	if (err && err != REG_NOMATCH) {
  344: 		php_ereg_eprint(err, &re TSRMLS_CC);
  345: 		regfree(&re);
  346: 		efree(subs);
  347: 		RETURN_FALSE;
  348: 	}
  349: 	match_len = 1;
  350: 
  351: 	if (array && err != REG_NOMATCH) {
  352: 		match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
  353: 		string_len = findin_len + 1;
  354: 
  355: 		buf = emalloc(string_len);
  356: 
  357: 		zval_dtor(*array);	/* start with clean array */
  358: 		array_init(*array);
  359: 
  360: 		for (i = 0; i <= re.re_nsub; i++) {
  361: 			start = subs[i].rm_so;
  362: 			end = subs[i].rm_eo;
  363: 			if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
  364: 				add_index_stringl(*array, i, string+start, end-start, 1);
  365: 			} else {
  366: 				add_index_bool(*array, i, 0);
  367: 			}
  368: 		}
  369: 		efree(buf);
  370: 	}
  371: 
  372: 	efree(subs);
  373: 	efree(string);
  374: 	if (err == REG_NOMATCH) {
  375: 		RETVAL_FALSE;
  376: 	} else {
  377: 		if (match_len == 0)
  378: 			match_len = 1;
  379: 		RETVAL_LONG(match_len);
  380: 	}
  381: 	regfree(&re);
  382: }
  383: /* }}} */
  384: 
  385: /* {{{ proto int ereg(string pattern, string string [, array registers])
  386:    Regular expression match */
  387: PHP_FUNCTION(ereg)
  388: {
  389: 	php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  390: }
  391: /* }}} */
  392: 
  393: /* {{{ proto int eregi(string pattern, string string [, array registers])
  394:    Case-insensitive regular expression match */
  395: PHP_FUNCTION(eregi)
  396: {
  397: 	php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  398: }
  399: /* }}} */
  400: 
  401: /* {{{ php_ereg_replace
  402:  * this is the meat and potatoes of regex replacement! */
  403: PHP_EREG_API char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended TSRMLS_DC)
  404: {
  405: 	regex_t re;
  406: 	regmatch_t *subs;
  407: 
  408: 	char *buf,	/* buf is where we build the replaced string */
  409: 		 *nbuf,	/* nbuf is used when we grow the buffer */
  410: 		 *walkbuf; /* used to walk buf when replacing backrefs */
  411: 	const char *walk; /* used to walk replacement string for backrefs */
  412: 	int buf_len;
  413: 	int pos, tmp, string_len, new_l;
  414: 	int err, copts = 0;
  415: 
  416: 	string_len = strlen(string);
  417: 
  418: 	if (icase) {
  419: 		copts = REG_ICASE;
  420: 	}
  421: 	if (extended) {
  422: 		copts |= REG_EXTENDED;
  423: 	}
  424: 
  425: 	err = regcomp(&re, pattern, copts);
  426: 	if (err) {
  427: 		php_ereg_eprint(err, &re TSRMLS_CC);
  428: 		return ((char *) -1);
  429: 	}
  430: 
  431: 
  432: 	/* allocate storage for (sub-)expression-matches */
  433: 	subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
  434: 
  435: 	/* start with a buffer that is twice the size of the stringo
  436: 	   we're doing replacements in */
  437: 	buf_len = 2 * string_len + 1;
  438: 	buf = safe_emalloc(buf_len, sizeof(char), 0);
  439: 
  440: 	err = pos = 0;
  441: 	buf[0] = '\0';
  442: 	while (!err) {
  443: 		err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
  444: 
  445: 		if (err && err != REG_NOMATCH) {
  446: 			php_ereg_eprint(err, &re TSRMLS_CC);
  447: 			efree(subs);
  448: 			efree(buf);
  449: 			regfree(&re);
  450: 			return ((char *) -1);
  451: 		}
  452: 
  453: 		if (!err) {
  454: 			/* backref replacement is done in two passes:
  455: 			   1) find out how long the string will be, and allocate buf
  456: 			   2) copy the part before match, replacement and backrefs to buf
  457: 
  458: 			   Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi>
  459: 			   */
  460: 
  461: 			new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
  462: 			walk = replace;
  463: 			while (*walk) {
  464: 				if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
  465: 					if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
  466: 						new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
  467: 					}
  468: 					walk += 2;
  469: 				} else {
  470: 					new_l++;
  471: 					walk++;
  472: 				}
  473: 			}
  474: 			if (new_l + 1 > buf_len) {
  475: 				buf_len = 1 + buf_len + 2 * new_l;
  476: 				nbuf = emalloc(buf_len);
  477: 				strncpy(nbuf, buf, buf_len - 1);
  478: 				nbuf[buf_len - 1] = '\0';
  479: 				efree(buf);
  480: 				buf = nbuf;
  481: 			}
  482: 			tmp = strlen(buf);
  483: 			/* copy the part of the string before the match */
  484: 			strncat(buf, &string[pos], subs[0].rm_so);
  485: 
  486: 			/* copy replacement and backrefs */
  487: 			walkbuf = &buf[tmp + subs[0].rm_so];
  488: 			walk = replace;
  489: 			while (*walk) {
  490: 				if ('\\' == *walk && isdigit((unsigned char)walk[1]) && (unsigned char)walk[1] - '0' <= (int)re.re_nsub) {
  491: 					if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
  492: 						/* this next case shouldn't happen. it does. */
  493: 						&& subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
  494: 						
  495: 						tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
  496: 						memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
  497: 						walkbuf += tmp;
  498: 					}
  499: 					walk += 2;
  500: 				} else {
  501: 					*walkbuf++ = *walk++;
  502: 				}
  503: 			}
  504: 			*walkbuf = '\0';
  505: 
  506: 			/* and get ready to keep looking for replacements */
  507: 			if (subs[0].rm_so == subs[0].rm_eo) {
  508: 				if (subs[0].rm_so + pos >= string_len) {
  509: 					break;
  510: 				}
  511: 				new_l = strlen (buf) + 1;
  512: 				if (new_l + 1 > buf_len) {
  513: 					buf_len = 1 + buf_len + 2 * new_l;
  514: 					nbuf = safe_emalloc(buf_len, sizeof(char), 0);
  515: 					strncpy(nbuf, buf, buf_len-1);
  516: 					efree(buf);
  517: 					buf = nbuf;
  518: 				}
  519: 				pos += subs[0].rm_eo + 1;
  520: 				buf [new_l-1] = string [pos-1];
  521: 				buf [new_l] = '\0';
  522: 			} else {
  523: 				pos += subs[0].rm_eo;
  524: 			}
  525: 		} else { /* REG_NOMATCH */
  526: 			new_l = strlen(buf) + strlen(&string[pos]);
  527: 			if (new_l + 1 > buf_len) {
  528: 				buf_len = new_l + 1; /* now we know exactly how long it is */
  529: 				nbuf = safe_emalloc(buf_len, sizeof(char), 0);
  530: 				strncpy(nbuf, buf, buf_len-1);
  531: 				efree(buf);
  532: 				buf = nbuf;
  533: 			}
  534: 			/* stick that last bit of string on our output */
  535: 			strlcat(buf, &string[pos], buf_len);
  536: 		}
  537: 	}
  538: 
  539: 	/* don't want to leak memory .. */
  540: 	efree(subs);
  541: 	regfree(&re);
  542: 
  543: 	/* whew. */
  544: 	return (buf);
  545: }
  546: /* }}} */
  547: 
  548: /* {{{ php_do_ereg_replace
  549:  */
  550: static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
  551: {
  552: 	zval **arg_pattern,
  553: 		**arg_replace;
  554: 	char *pattern, *arg_string;
  555: 	char *string;
  556: 	char *replace;
  557: 	char *ret;
  558: 	int arg_string_len;
  559: 	
  560: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZs", &arg_pattern, &arg_replace, &arg_string, &arg_string_len) == FAILURE) {
  561: 		return;
  562: 	}
  563: 
  564: 	if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
  565: 		if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) {
  566: 			pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
  567: 		} else {
  568: 			pattern = STR_EMPTY_ALLOC();
  569: 		}
  570: 	} else {
  571: 		convert_to_long_ex(arg_pattern);
  572: 		pattern = emalloc(2);
  573: 		pattern[0] = (char) Z_LVAL_PP(arg_pattern);
  574: 		pattern[1] = '\0';
  575: 	}
  576: 
  577: 	if (Z_TYPE_PP(arg_replace) == IS_STRING) {
  578: 		if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) {
  579: 			replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
  580: 		} else {
  581: 			replace = STR_EMPTY_ALLOC();
  582: 		}
  583: 	} else {
  584: 		convert_to_long_ex(arg_replace);
  585: 		replace = emalloc(2);
  586: 		replace[0] = (char) Z_LVAL_PP(arg_replace);
  587: 		replace[1] = '\0';
  588: 	}
  589: 
  590: 	if (arg_string && arg_string_len) {
  591: 		string = estrndup(arg_string, arg_string_len);
  592: 	} else {
  593: 		string = STR_EMPTY_ALLOC();
  594: 	}
  595: 
  596: 	/* do the actual work */
  597: 	ret = php_ereg_replace(pattern, replace, string, icase, 1 TSRMLS_CC);
  598: 	if (ret == (char *) -1) {
  599: 		RETVAL_FALSE;
  600: 	} else {
  601: 		RETVAL_STRING(ret, 1);
  602: 		STR_FREE(ret);
  603: 	}
  604: 
  605: 	STR_FREE(string);
  606: 	STR_FREE(replace);
  607: 	STR_FREE(pattern);
  608: }
  609: /* }}} */
  610: 
  611: /* {{{ proto string ereg_replace(string pattern, string replacement, string string)
  612:    Replace regular expression */
  613: PHP_FUNCTION(ereg_replace)
  614: {
  615: 	php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  616: }
  617: /* }}} */
  618: 
  619: /* {{{ proto string eregi_replace(string pattern, string replacement, string string)
  620:    Case insensitive replace regular expression */
  621: PHP_FUNCTION(eregi_replace)
  622: {
  623: 	php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  624: }
  625: /* }}} */
  626: 
  627: /* {{{ php_split
  628:  */
  629: static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
  630: {
  631: 	long count = -1;
  632: 	regex_t re;
  633: 	regmatch_t subs[1];
  634: 	char *spliton, *str, *strp, *endp;
  635: 	int spliton_len, str_len;
  636: 	int err, size, copts = 0;
  637: 
  638: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) {
  639: 		return;
  640: 	}
  641: 
  642: 	if (icase) {
  643: 		copts = REG_ICASE;
  644: 	}
  645: 
  646: 	strp = str;
  647: 	endp = strp + str_len;
  648: 
  649: 	err = regcomp(&re, spliton, REG_EXTENDED | copts);
  650: 	if (err) {
  651: 		php_ereg_eprint(err, &re TSRMLS_CC);
  652: 		RETURN_FALSE;
  653: 	}
  654: 
  655: 	array_init(return_value);
  656: 
  657: 	/* churn through str, generating array entries as we go */
  658: 	while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
  659: 		if (subs[0].rm_so == 0 && subs[0].rm_eo) {
  660: 			/* match is at start of string, return empty string */
  661: 			add_next_index_stringl(return_value, "", 0, 1);
  662: 			/* skip ahead the length of the regex match */
  663: 			strp += subs[0].rm_eo;
  664: 		} else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
  665: 			/* No more matches */
  666: 			regfree(&re);
  667: 			
  668: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression");
  669: 			
  670: 			zend_hash_destroy(Z_ARRVAL_P(return_value));
  671: 			efree(Z_ARRVAL_P(return_value));
  672: 			RETURN_FALSE;
  673: 		} else {
  674: 			/* On a real match */
  675: 
  676: 			/* make a copy of the substring */
  677: 			size = subs[0].rm_so;
  678: 		
  679: 			/* add it to the array */
  680: 			add_next_index_stringl(return_value, strp, size, 1);
  681: 
  682: 			/* point at our new starting point */
  683: 			strp = strp + subs[0].rm_eo;
  684: 		}
  685: 
  686: 		/* if we're only looking for a certain number of points,
  687: 		   stop looking once we hit it */
  688: 		if (count != -1) {
  689: 			count--;
  690: 		}
  691: 	}
  692: 
  693: 	/* see if we encountered an error */
  694: 	if (err && err != REG_NOMATCH) {
  695: 		php_ereg_eprint(err, &re TSRMLS_CC);
  696: 		regfree(&re);
  697: 		zend_hash_destroy(Z_ARRVAL_P(return_value));
  698: 		efree(Z_ARRVAL_P(return_value));
  699: 		RETURN_FALSE;
  700: 	}
  701: 
  702: 	/* otherwise we just have one last element to add to the array */
  703: 	size = endp - strp;
  704: 	
  705: 	add_next_index_stringl(return_value, strp, size, 1);
  706: 
  707: 	regfree(&re);
  708: }
  709: /* }}} */
  710: 
  711: /* {{{ proto array split(string pattern, string string [, int limit])
  712:    Split string into array by regular expression */
  713: PHP_FUNCTION(split)
  714: {
  715: 	php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  716: }
  717: /* }}} */
  718: 
  719: /* {{{ proto array spliti(string pattern, string string [, int limit])
  720:    Split string into array by regular expression case-insensitive */
  721: 
  722: PHP_FUNCTION(spliti)
  723: {
  724: 	php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  725: }
  726: 
  727: /* }}} */
  728: 
  729: /* {{{ proto string sql_regcase(string string)
  730:    Make regular expression for case insensitive match */
  731: PHP_EREG_API PHP_FUNCTION(sql_regcase)
  732: {
  733: 	char *string, *tmp;
  734: 	int string_len;
  735: 	unsigned char c;
  736: 	register int i, j;
  737: 
  738: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) {
  739: 		return;
  740: 	}
  741: 	
  742: 	tmp = safe_emalloc(string_len, 4, 1);
  743: 	
  744: 	for (i = j = 0; i < string_len; i++) {
  745: 		c = (unsigned char) string[i];
  746: 		if (isalpha(c)) {
  747: 			tmp[j++] = '[';
  748: 			tmp[j++] = toupper(c);
  749: 			tmp[j++] = tolower(c);
  750: 			tmp[j++] = ']';
  751: 		} else {
  752: 			tmp[j++] = c;
  753: 		}
  754: 	}
  755: 	tmp[j] = 0;
  756: 
  757: 	RETVAL_STRINGL(tmp, j, 1);
  758: 	efree(tmp);
  759: }
  760: /* }}} */
  761: 
  762: /*
  763:  * Local variables:
  764:  * tab-width: 4
  765:  * c-basic-offset: 4
  766:  * End:
  767:  * vim600: noet sw=4 ts=4 fdm=marker
  768:  * vim<600: noet sw=4 ts=4
  769:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>