File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / php_mbregex.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:03:49 2014 UTC (10 years, 1 month ago) by misho
Branches: php, MAIN
CVS tags: v5_4_29, HEAD
php 5.4.29

    1: /*
    2:    +----------------------------------------------------------------------+
    3:    | PHP Version 5                                                        |
    4:    +----------------------------------------------------------------------+
    5:    | Copyright (c) 1997-2014 The PHP Group                                |
    6:    +----------------------------------------------------------------------+
    7:    | This source file is subject to version 3.01 of the PHP license,      |
    8:    | that is bundled with this package in the file LICENSE, and is        |
    9:    | available through the world-wide-web at the following url:           |
   10:    | http://www.php.net/license/3_01.txt                                  |
   11:    | If you did not receive a copy of the PHP license and are unable to   |
   12:    | obtain it through the world-wide-web, please send a note to          |
   13:    | license@php.net so we can mail you a copy immediately.               |
   14:    +----------------------------------------------------------------------+
   15:    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
   16:    +----------------------------------------------------------------------+
   17:  */
   18: 
   19: /* $Id: php_mbregex.c,v 1.1.1.4 2014/06/15 20:03:49 misho Exp $ */
   20: 
   21: 
   22: #ifdef HAVE_CONFIG_H
   23: #include "config.h"
   24: #endif
   25: 
   26: #include "php.h"
   27: #include "php_ini.h"
   28: 
   29: #if HAVE_MBREGEX
   30: 
   31: #include "ext/standard/php_smart_str.h"
   32: #include "ext/standard/info.h"
   33: #include "php_mbregex.h"
   34: #include "mbstring.h"
   35:  
   36: #include "php_onig_compat.h" /* must come prior to the oniguruma header */
   37: #include <oniguruma.h>
   38: #undef UChar
   39: 
   40: ZEND_EXTERN_MODULE_GLOBALS(mbstring)
   41: 
   42: struct _zend_mb_regex_globals {
   43: 	OnigEncoding default_mbctype;
   44: 	OnigEncoding current_mbctype;
   45: 	HashTable ht_rc;
   46: 	zval *search_str;
   47: 	zval *search_str_val;
   48: 	unsigned int search_pos;
   49: 	php_mb_regex_t *search_re;
   50: 	OnigRegion *search_regs;
   51: 	OnigOptionType regex_default_options;
   52: 	OnigSyntaxType *regex_default_syntax;
   53: };
   54: 
   55: #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
   56: 
   57: /* {{{ static void php_mb_regex_free_cache() */
   58: static void php_mb_regex_free_cache(php_mb_regex_t **pre) 
   59: {
   60: 	onig_free(*pre);
   61: }
   62: /* }}} */
   63: 
   64: /* {{{ _php_mb_regex_globals_ctor */
   65: static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
   66: {
   67: 	pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
   68: 	pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
   69: 	zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
   70: 	pglobals->search_str = (zval*) NULL;
   71: 	pglobals->search_re = (php_mb_regex_t*)NULL;
   72: 	pglobals->search_pos = 0;
   73: 	pglobals->search_regs = (OnigRegion*)NULL;
   74: 	pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
   75: 	pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
   76: 	return SUCCESS;
   77: }
   78: /* }}} */
   79: 
   80: /* {{{ _php_mb_regex_globals_dtor */
   81: static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC) 
   82: {
   83: 	zend_hash_destroy(&pglobals->ht_rc);
   84: }
   85: /* }}} */
   86: 
   87: /* {{{ php_mb_regex_globals_alloc */
   88: zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
   89: {
   90: 	zend_mb_regex_globals *pglobals = pemalloc(
   91: 			sizeof(zend_mb_regex_globals), 1);
   92: 	if (!pglobals) {
   93: 		return NULL;
   94: 	}
   95: 	if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
   96: 		pefree(pglobals, 1);
   97: 		return NULL;
   98: 	}
   99: 	return pglobals;
  100: }
  101: /* }}} */
  102: 
  103: /* {{{ php_mb_regex_globals_free */
  104: void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
  105: {
  106: 	if (!pglobals) {
  107: 		return;
  108: 	}
  109: 	_php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  110: 	pefree(pglobals, 1);
  111: }
  112: /* }}} */
  113: 
  114: /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
  115: PHP_MINIT_FUNCTION(mb_regex)
  116: {
  117: 	onig_init();
  118: 	return SUCCESS;
  119: }
  120: /* }}} */
  121: 
  122: /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
  123: PHP_MSHUTDOWN_FUNCTION(mb_regex)
  124: {
  125: 	onig_end();
  126: 	return SUCCESS;
  127: }
  128: /* }}} */
  129: 
  130: /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
  131: PHP_RINIT_FUNCTION(mb_regex)
  132: {
  133: 	return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
  134: }
  135: /* }}} */
  136: 
  137: /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
  138: PHP_RSHUTDOWN_FUNCTION(mb_regex)
  139: {
  140: 	MBREX(current_mbctype) = MBREX(default_mbctype);
  141: 
  142: 	if (MBREX(search_str) != NULL) {
  143: 		zval_ptr_dtor(&MBREX(search_str));
  144: 		MBREX(search_str) = (zval *)NULL;
  145: 	}
  146: 	MBREX(search_pos) = 0;
  147: 
  148: 	if (MBREX(search_regs) != NULL) {
  149: 		onig_region_free(MBREX(search_regs), 1);
  150: 		MBREX(search_regs) = (OnigRegion *)NULL;
  151: 	}
  152: 	zend_hash_clean(&MBREX(ht_rc));
  153: 
  154: 	return SUCCESS;
  155: }
  156: /* }}} */
  157: 
  158: /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
  159: PHP_MINFO_FUNCTION(mb_regex)
  160: {
  161: 	char buf[32];
  162: 	php_info_print_table_start();
  163: 	php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  164: 	snprintf(buf, sizeof(buf), "%d.%d.%d",
  165: 			ONIGURUMA_VERSION_MAJOR,
  166: 			ONIGURUMA_VERSION_MINOR,
  167: 			ONIGURUMA_VERSION_TEENY);
  168: #ifdef PHP_ONIG_BUNDLED
  169: #ifdef USE_COMBINATION_EXPLOSION_CHECK
  170: 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
  171: #else	/* USE_COMBINATION_EXPLOSION_CHECK */
  172: 	php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
  173: #endif	/* USE_COMBINATION_EXPLOSION_CHECK */
  174: #endif /* PHP_BUNDLED_ONIG */
  175: 	php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  176: 	php_info_print_table_end();
  177: }
  178: /* }}} */
  179: 
  180: /*
  181:  * encoding name resolver
  182:  */
  183: 
  184: /* {{{ encoding name map */
  185: typedef struct _php_mb_regex_enc_name_map_t {
  186: 	const char *names;
  187: 	OnigEncoding code;
  188: } php_mb_regex_enc_name_map_t;
  189: 
  190: php_mb_regex_enc_name_map_t enc_name_map[] = {
  191: #ifdef ONIG_ENCODING_EUC_JP
  192: 	{
  193: 		"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
  194: 		ONIG_ENCODING_EUC_JP
  195: 	},
  196: #endif
  197: #ifdef ONIG_ENCODING_UTF8
  198: 	{
  199: 		"UTF-8\0UTF8\0",
  200: 		ONIG_ENCODING_UTF8
  201: 	},
  202: #endif
  203: #ifdef ONIG_ENCODING_UTF16_BE
  204: 	{
  205: 		"UTF-16\0UTF-16BE\0",
  206: 		ONIG_ENCODING_UTF16_BE
  207: 	},
  208: #endif
  209: #ifdef ONIG_ENCODING_UTF16_LE
  210: 	{
  211: 		"UTF-16LE\0",
  212: 		ONIG_ENCODING_UTF16_LE
  213: 	},
  214: #endif
  215: #ifdef ONIG_ENCODING_UTF32_BE
  216: 	{
  217: 		"UCS-4\0UTF-32\0UTF-32BE\0",
  218: 		ONIG_ENCODING_UTF32_BE
  219: 	},
  220: #endif
  221: #ifdef ONIG_ENCODING_UTF32_LE
  222: 	{
  223: 		"UCS-4LE\0UTF-32LE\0",
  224: 		ONIG_ENCODING_UTF32_LE
  225: 	},
  226: #endif
  227: #ifdef ONIG_ENCODING_SJIS
  228: 	{
  229: 		"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
  230: 		ONIG_ENCODING_SJIS
  231: 	},
  232: #endif
  233: #ifdef ONIG_ENCODING_BIG5
  234: 	{
  235: 		"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
  236: 		ONIG_ENCODING_BIG5
  237: 	},
  238: #endif
  239: #ifdef ONIG_ENCODING_EUC_CN
  240: 	{
  241: 		"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
  242: 		ONIG_ENCODING_EUC_CN
  243: 	},
  244: #endif
  245: #ifdef ONIG_ENCODING_EUC_TW
  246: 	{
  247: 		"EUC-TW\0EUCTW\0EUC_TW\0",
  248: 		ONIG_ENCODING_EUC_TW
  249: 	},
  250: #endif
  251: #ifdef ONIG_ENCODING_EUC_KR
  252: 	{
  253: 		"EUC-KR\0EUCKR\0EUC_KR\0",
  254: 		ONIG_ENCODING_EUC_KR
  255: 	},
  256: #endif
  257: #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
  258: 	{
  259: 		"KOI8\0KOI-8\0",
  260: 		ONIG_ENCODING_KOI8
  261: 	},
  262: #endif
  263: #ifdef ONIG_ENCODING_KOI8_R
  264: 	{
  265: 		"KOI8R\0KOI8-R\0KOI-8R\0",
  266: 		ONIG_ENCODING_KOI8_R
  267: 	},
  268: #endif
  269: #ifdef ONIG_ENCODING_ISO_8859_1
  270: 	{
  271: 		"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
  272: 		ONIG_ENCODING_ISO_8859_1
  273: 	},
  274: #endif
  275: #ifdef ONIG_ENCODING_ISO_8859_2
  276: 	{
  277: 		"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
  278: 		ONIG_ENCODING_ISO_8859_2
  279: 	},
  280: #endif
  281: #ifdef ONIG_ENCODING_ISO_8859_3
  282: 	{
  283: 		"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
  284: 		ONIG_ENCODING_ISO_8859_3
  285: 	},
  286: #endif
  287: #ifdef ONIG_ENCODING_ISO_8859_4
  288: 	{
  289: 		"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
  290: 		ONIG_ENCODING_ISO_8859_4
  291: 	},
  292: #endif
  293: #ifdef ONIG_ENCODING_ISO_8859_5
  294: 	{
  295: 		"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
  296: 		ONIG_ENCODING_ISO_8859_5
  297: 	},
  298: #endif
  299: #ifdef ONIG_ENCODING_ISO_8859_6
  300: 	{
  301: 		"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
  302: 		ONIG_ENCODING_ISO_8859_6
  303: 	},
  304: #endif
  305: #ifdef ONIG_ENCODING_ISO_8859_7
  306: 	{
  307: 		"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
  308: 		ONIG_ENCODING_ISO_8859_7
  309: 	},
  310: #endif
  311: #ifdef ONIG_ENCODING_ISO_8859_8
  312: 	{
  313: 		"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
  314: 		ONIG_ENCODING_ISO_8859_8
  315: 	},
  316: #endif
  317: #ifdef ONIG_ENCODING_ISO_8859_9
  318: 	{
  319: 		"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
  320: 		ONIG_ENCODING_ISO_8859_9
  321: 	},
  322: #endif
  323: #ifdef ONIG_ENCODING_ISO_8859_10
  324: 	{
  325: 		"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
  326: 		ONIG_ENCODING_ISO_8859_10
  327: 	},
  328: #endif
  329: #ifdef ONIG_ENCODING_ISO_8859_11
  330: 	{
  331: 		"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
  332: 		ONIG_ENCODING_ISO_8859_11
  333: 	},
  334: #endif
  335: #ifdef ONIG_ENCODING_ISO_8859_13
  336: 	{
  337: 		"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
  338: 		ONIG_ENCODING_ISO_8859_13
  339: 	},
  340: #endif
  341: #ifdef ONIG_ENCODING_ISO_8859_14
  342: 	{
  343: 		"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
  344: 		ONIG_ENCODING_ISO_8859_14
  345: 	},
  346: #endif
  347: #ifdef ONIG_ENCODING_ISO_8859_15
  348: 	{
  349: 		"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
  350: 		ONIG_ENCODING_ISO_8859_15
  351: 	},
  352: #endif
  353: #ifdef ONIG_ENCODING_ISO_8859_16
  354: 	{
  355: 		"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
  356: 		ONIG_ENCODING_ISO_8859_16
  357: 	},
  358: #endif
  359: #ifdef ONIG_ENCODING_ASCII
  360: 	{
  361: 		"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
  362: 		ONIG_ENCODING_ASCII
  363: 	},
  364: #endif
  365: 	{ NULL, ONIG_ENCODING_UNDEF }
  366: };
  367: /* }}} */
  368: 
  369: /* {{{ php_mb_regex_name2mbctype */
  370: static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
  371: {
  372: 	const char *p;
  373: 	php_mb_regex_enc_name_map_t *mapping;
  374: 
  375: 	if (pname == NULL || !*pname) {
  376: 		return ONIG_ENCODING_UNDEF;
  377: 	}
  378: 
  379: 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  380: 		for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
  381: 			if (strcasecmp(p, pname) == 0) {
  382: 				return mapping->code;
  383: 			}
  384: 		}
  385: 	}
  386: 
  387: 	return ONIG_ENCODING_UNDEF;
  388: }
  389: /* }}} */
  390: 
  391: /* {{{ php_mb_regex_mbctype2name */
  392: static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
  393: {
  394: 	php_mb_regex_enc_name_map_t *mapping;
  395: 
  396: 	for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  397: 		if (mapping->code == mbctype) {
  398: 			return mapping->names;
  399: 		}
  400: 	}
  401: 
  402: 	return NULL;
  403: }
  404: /* }}} */
  405: 
  406: /* {{{ php_mb_regex_set_mbctype */
  407: int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
  408: {
  409: 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  410: 	if (mbctype == ONIG_ENCODING_UNDEF) {
  411: 		return FAILURE;
  412: 	}
  413: 	MBREX(current_mbctype) = mbctype;
  414: 	return SUCCESS;
  415: }
  416: /* }}} */
  417: 
  418: /* {{{ php_mb_regex_set_default_mbctype */
  419: int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
  420: {
  421: 	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  422: 	if (mbctype == ONIG_ENCODING_UNDEF) {
  423: 		return FAILURE;
  424: 	}
  425: 	MBREX(default_mbctype) = mbctype;
  426: 	return SUCCESS;
  427: }
  428: /* }}} */
  429: 
  430: /* {{{ php_mb_regex_get_mbctype */
  431: const char *php_mb_regex_get_mbctype(TSRMLS_D)
  432: {
  433: 	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  434: }
  435: /* }}} */
  436: 
  437: /* {{{ php_mb_regex_get_default_mbctype */
  438: const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
  439: {
  440: 	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
  441: }
  442: /* }}} */
  443: 
  444: /*
  445:  * regex cache
  446:  */
  447: /* {{{ php_mbregex_compile_pattern */
  448: static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
  449: {
  450: 	int err_code = 0;
  451: 	int found = 0;
  452: 	php_mb_regex_t *retval = NULL, **rc = NULL;
  453: 	OnigErrorInfo err_info;
  454: 	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  455: 
  456: 	found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
  457: 	if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
  458: 		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
  459: 			onig_error_code_to_str(err_str, err_code, err_info);
  460: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
  461: 			retval = NULL;
  462: 			goto out;
  463: 		}
  464: 		zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
  465: 	} else if (found == SUCCESS) {
  466: 		retval = *rc;
  467: 	}
  468: out:
  469: 	return retval; 
  470: }
  471: /* }}} */
  472: 
  473: /* {{{ _php_mb_regex_get_option_string */
  474: static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
  475: {
  476: 	size_t len_left = len;
  477: 	size_t len_req = 0;
  478: 	char *p = str;
  479: 	char c;
  480: 
  481: 	if ((option & ONIG_OPTION_IGNORECASE) != 0) {
  482: 		if (len_left > 0) {
  483: 			--len_left;
  484: 			*(p++) = 'i';
  485: 		}
  486: 		++len_req;	
  487: 	}
  488: 
  489: 	if ((option & ONIG_OPTION_EXTEND) != 0) {
  490: 		if (len_left > 0) {
  491: 			--len_left;
  492: 			*(p++) = 'x';
  493: 		}
  494: 		++len_req;	
  495: 	}
  496: 
  497: 	if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
  498: 			(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
  499: 		if (len_left > 0) {
  500: 			--len_left;
  501: 			*(p++) = 'p';
  502: 		}
  503: 		++len_req;	
  504: 	} else {
  505: 		if ((option & ONIG_OPTION_MULTILINE) != 0) {
  506: 			if (len_left > 0) {
  507: 				--len_left;
  508: 				*(p++) = 'm';
  509: 			}
  510: 			++len_req;	
  511: 		}
  512: 
  513: 		if ((option & ONIG_OPTION_SINGLELINE) != 0) {
  514: 			if (len_left > 0) {
  515: 				--len_left;
  516: 				*(p++) = 's';
  517: 			}
  518: 			++len_req;	
  519: 		}
  520: 	}	
  521: 	if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
  522: 		if (len_left > 0) {
  523: 			--len_left;
  524: 			*(p++) = 'l';
  525: 		}
  526: 		++len_req;	
  527: 	}
  528: 	if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
  529: 		if (len_left > 0) {
  530: 			--len_left;
  531: 			*(p++) = 'n';
  532: 		}
  533: 		++len_req;	
  534: 	}
  535: 
  536: 	c = 0;
  537: 
  538: 	if (syntax == ONIG_SYNTAX_JAVA) {
  539: 		c = 'j';
  540: 	} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
  541: 		c = 'u';
  542: 	} else if (syntax == ONIG_SYNTAX_GREP) {
  543: 		c = 'g';
  544: 	} else if (syntax == ONIG_SYNTAX_EMACS) {
  545: 		c = 'c';
  546: 	} else if (syntax == ONIG_SYNTAX_RUBY) {
  547: 		c = 'r';
  548: 	} else if (syntax == ONIG_SYNTAX_PERL) {
  549: 		c = 'z';
  550: 	} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
  551: 		c = 'b';
  552: 	} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
  553: 		c = 'd';
  554: 	}
  555: 
  556: 	if (c != 0) {
  557: 		if (len_left > 0) {
  558: 			--len_left;
  559: 			*(p++) = c;
  560: 		}
  561: 		++len_req;
  562: 	}
  563: 
  564: 
  565: 	if (len_left > 0) {
  566: 		--len_left;
  567: 		*(p++) = '\0';
  568: 	}
  569: 	++len_req;	
  570: 	if (len < len_req) {
  571: 		return len_req;
  572: 	}
  573: 
  574: 	return 0;
  575: }
  576: /* }}} */
  577: 
  578: /* {{{ _php_mb_regex_init_options */
  579: static void
  580: _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) 
  581: {
  582: 	int n;
  583: 	char c;
  584: 	int optm = 0; 
  585: 
  586: 	*syntax = ONIG_SYNTAX_RUBY;
  587: 
  588: 	if (parg != NULL) {
  589: 		n = 0;
  590: 		while(n < narg) {
  591: 			c = parg[n++];
  592: 			switch (c) {
  593: 				case 'i':
  594: 					optm |= ONIG_OPTION_IGNORECASE;
  595: 					break;
  596: 				case 'x':
  597: 					optm |= ONIG_OPTION_EXTEND;
  598: 					break;
  599: 				case 'm':
  600: 					optm |= ONIG_OPTION_MULTILINE;
  601: 					break;
  602: 				case 's':
  603: 					optm |= ONIG_OPTION_SINGLELINE;
  604: 					break;
  605: 				case 'p':
  606: 					optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  607: 					break;
  608: 				case 'l':
  609: 					optm |= ONIG_OPTION_FIND_LONGEST;
  610: 					break;
  611: 				case 'n':
  612: 					optm |= ONIG_OPTION_FIND_NOT_EMPTY;
  613: 					break;
  614: 				case 'j':
  615: 					*syntax = ONIG_SYNTAX_JAVA;
  616: 					break;
  617: 				case 'u':
  618: 					*syntax = ONIG_SYNTAX_GNU_REGEX;
  619: 					break;
  620: 				case 'g':
  621: 					*syntax = ONIG_SYNTAX_GREP;
  622: 					break;
  623: 				case 'c':
  624: 					*syntax = ONIG_SYNTAX_EMACS;
  625: 					break;
  626: 				case 'r':
  627: 					*syntax = ONIG_SYNTAX_RUBY;
  628: 					break;
  629: 				case 'z':
  630: 					*syntax = ONIG_SYNTAX_PERL;
  631: 					break;
  632: 				case 'b':
  633: 					*syntax = ONIG_SYNTAX_POSIX_BASIC;
  634: 					break;
  635: 				case 'd':
  636: 					*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
  637: 					break;
  638: 				case 'e':
  639: 					if (eval != NULL) *eval = 1; 
  640: 					break;
  641: 				default:
  642: 					break;
  643: 			}
  644: 		}
  645: 		if (option != NULL) *option|=optm; 
  646: 	}
  647: }
  648: /* }}} */
  649: 
  650: /*
  651:  * php functions
  652:  */
  653: 
  654: /* {{{ proto string mb_regex_encoding([string encoding])
  655:    Returns the current encoding for regex as a string. */
  656: PHP_FUNCTION(mb_regex_encoding)
  657: {
  658: 	size_t argc = ZEND_NUM_ARGS();
  659: 	char *encoding;
  660: 	int encoding_len;
  661: 	OnigEncoding mbctype;
  662: 
  663: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
  664: 		return;
  665: 	}
  666: 
  667: 	if (argc == 0) {
  668: 		const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  669: 
  670: 		if (retval == NULL) {
  671: 			RETURN_FALSE;
  672: 		}
  673: 
  674: 		RETURN_STRING((char *)retval, 1);
  675: 	} else if (argc == 1) {
  676: 		mbctype = _php_mb_regex_name2mbctype(encoding);
  677: 
  678: 		if (mbctype == ONIG_ENCODING_UNDEF) {
  679: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  680: 			RETURN_FALSE;
  681: 		}
  682: 
  683: 		MBREX(current_mbctype) = mbctype;
  684: 		RETURN_TRUE;
  685: 	}
  686: }
  687: /* }}} */
  688: 
  689: /* {{{ _php_mb_regex_ereg_exec */
  690: static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
  691: {
  692: 	zval **arg_pattern, *array;
  693: 	char *string;
  694: 	int string_len;
  695: 	php_mb_regex_t *re;
  696: 	OnigRegion *regs = NULL;
  697: 	int i, match_len, beg, end;
  698: 	OnigOptionType options;
  699: 	char *str;
  700: 
  701: 	array = NULL;
  702: 
  703: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
  704: 		RETURN_FALSE;
  705: 	}
  706: 
  707: 	options = MBREX(regex_default_options);
  708: 	if (icase) {
  709: 		options |= ONIG_OPTION_IGNORECASE;
  710: 	}
  711: 
  712: 	/* compile the regular expression from the supplied regex */
  713: 	if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
  714: 		/* we convert numbers to integers and treat them as a string */
  715: 		if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
  716: 			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
  717: 		}
  718: 		convert_to_string_ex(arg_pattern);
  719: 		/* don't bother doing an extended regex with just a number */
  720: 	}
  721: 
  722: 	if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
  723: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
  724: 		RETVAL_FALSE;
  725: 		goto out;
  726: 	}
  727: 
  728: 	re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
  729: 	if (re == NULL) {
  730: 		RETVAL_FALSE;
  731: 		goto out;
  732: 	}
  733: 
  734: 	regs = onig_region_new();
  735: 
  736: 	/* actually execute the regular expression */
  737: 	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
  738: 		RETVAL_FALSE;
  739: 		goto out;
  740: 	}
  741: 
  742: 	match_len = 1;
  743: 	str = string;
  744: 	if (array != NULL) {
  745: 		match_len = regs->end[0] - regs->beg[0];
  746: 		zval_dtor(array);
  747: 		array_init(array);
  748: 		for (i = 0; i < regs->num_regs; i++) {
  749: 			beg = regs->beg[i];
  750: 			end = regs->end[i];
  751: 			if (beg >= 0 && beg < end && end <= string_len) {
  752: 				add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
  753: 			} else {
  754: 				add_index_bool(array, i, 0);
  755: 			}
  756: 		}
  757: 	}
  758: 
  759: 	if (match_len == 0) {
  760: 		match_len = 1;
  761: 	}
  762: 	RETVAL_LONG(match_len);
  763: out:
  764: 	if (regs != NULL) {
  765: 		onig_region_free(regs, 1);
  766: 	}
  767: }
  768: /* }}} */
  769: 
  770: /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
  771:    Regular expression match for multibyte string */
  772: PHP_FUNCTION(mb_ereg)
  773: {
  774: 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  775: }
  776: /* }}} */
  777: 
  778: /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
  779:    Case-insensitive regular expression match for multibyte string */
  780: PHP_FUNCTION(mb_eregi)
  781: {
  782: 	_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  783: }
  784: /* }}} */
  785: 
  786: /* {{{ _php_mb_regex_ereg_replace_exec */
  787: static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
  788: {
  789: 	zval **arg_pattern_zval;
  790: 
  791: 	char *arg_pattern;
  792: 	int arg_pattern_len;
  793: 
  794: 	char *replace;
  795: 	int replace_len;
  796: 
  797: 	zend_fcall_info arg_replace_fci;
  798: 	zend_fcall_info_cache arg_replace_fci_cache;
  799: 
  800: 	char *string;
  801: 	int string_len;
  802: 
  803: 	char *p;
  804: 	php_mb_regex_t *re;
  805: 	OnigSyntaxType *syntax;
  806: 	OnigRegion *regs = NULL;
  807: 	smart_str out_buf = { 0 };
  808: 	smart_str eval_buf = { 0 };
  809: 	smart_str *pbuf;
  810: 	int i, err, eval, n;
  811: 	OnigUChar *pos;
  812: 	OnigUChar *string_lim;
  813: 	char *description = NULL;
  814: 	char pat_buf[2];
  815: 
  816: 	const mbfl_encoding *enc;
  817: 
  818: 	{
  819: 		const char *current_enc_name;
  820: 		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  821: 		if (current_enc_name == NULL ||
  822: 			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
  823: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
  824: 			RETURN_FALSE;
  825: 		}
  826: 	}
  827: 	eval = 0;
  828: 	{
  829: 		char *option_str = NULL;
  830: 		int option_str_len = 0;
  831: 
  832: 		if (!is_callable) {
  833: 			if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
  834: 						&arg_pattern_zval,
  835: 						&replace, &replace_len,
  836: 						&string, &string_len,
  837: 						&option_str, &option_str_len) == FAILURE) {
  838: 				RETURN_FALSE;
  839: 			}
  840: 		} else {
  841: 			if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
  842: 						&arg_pattern_zval,
  843: 						&arg_replace_fci, &arg_replace_fci_cache,
  844: 						&string, &string_len,
  845: 						&option_str, &option_str_len) == FAILURE) {
  846: 				RETURN_FALSE;
  847: 			}
  848: 		}
  849: 
  850: 		if (option_str != NULL) {
  851: 			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
  852: 		} else {
  853: 			options |= MBREX(regex_default_options);
  854: 			syntax = MBREX(regex_default_syntax);
  855: 		}
  856: 	}
  857: 	if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
  858: 		arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
  859: 		arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
  860: 	} else {
  861: 		/* FIXME: this code is not multibyte aware! */
  862: 		convert_to_long_ex(arg_pattern_zval);
  863: 		pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);	
  864: 		pat_buf[1] = '\0';
  865: 
  866: 		arg_pattern = pat_buf;
  867: 		arg_pattern_len = 1;	
  868: 	}
  869: 	/* create regex pattern buffer */
  870: 	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
  871: 	if (re == NULL) {
  872: 		RETURN_FALSE;
  873: 	}
  874: 
  875: 	if (eval || is_callable) {
  876: 		pbuf = &eval_buf;
  877: 		description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
  878: 	} else {
  879: 		pbuf = &out_buf;
  880: 		description = NULL;
  881: 	}
  882: 
  883: 	if (is_callable) {
  884: 		if (eval) {
  885: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
  886: 			RETURN_FALSE;
  887: 		}
  888: 	}
  889: 
  890: 	/* do the actual work */
  891: 	err = 0;
  892: 	pos = (OnigUChar *)string;
  893: 	string_lim = (OnigUChar*)(string + string_len);
  894: 	regs = onig_region_new();
  895: 	while (err >= 0) {
  896: 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
  897: 		if (err <= -2) {
  898: 			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  899: 			onig_error_code_to_str(err_str, err);
  900: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
  901: 			break;
  902: 		}
  903: 		if (err >= 0) {
  904: #if moriyoshi_0
  905: 			if (regs->beg[0] == regs->end[0]) {
  906: 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  907: 				break;
  908: 			}
  909: #endif
  910: 			/* copy the part of the string before the match */
  911: 			smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
  912: 
  913: 			if (!is_callable) {
  914: 				/* copy replacement and backrefs */
  915: 				i = 0;
  916: 				p = replace;
  917: 				while (i < replace_len) {
  918: 					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
  919: 					n = -1;
  920: 					if ((replace_len - i) >= 2 && fwd == 1 &&
  921: 					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
  922: 						n = p[1] - '0';
  923: 					}
  924: 					if (n >= 0 && n < regs->num_regs) {
  925: 						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
  926: 							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
  927: 						}
  928: 						p += 2;
  929: 						i += 2;
  930: 					} else {
  931: 						smart_str_appendl(pbuf, p, fwd);
  932: 						p += fwd;
  933: 						i += fwd;
  934: 					}
  935: 				}
  936: 			}
  937: 				
  938: 			if (eval) {
  939: 				zval v;
  940: 				/* null terminate buffer */
  941: 				smart_str_0(&eval_buf);
  942: 				/* do eval */
  943: 				if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
  944: 					efree(description);
  945: 					php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
  946: 					/* zend_error() does not return in this case */
  947: 				}
  948: 
  949: 				/* result of eval */
  950: 				convert_to_string(&v);
  951: 				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
  952: 				/* Clean up */
  953: 				eval_buf.len = 0;
  954: 				zval_dtor(&v);
  955: 			} else if (is_callable) {
  956: 				zval *retval_ptr;
  957: 				zval **args[1];
  958: 				zval *subpats;
  959: 				int i;
  960: 				
  961: 				MAKE_STD_ZVAL(subpats);
  962: 				array_init(subpats);
  963: 				
  964: 				for (i = 0; i < regs->num_regs; i++) {
  965: 					add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
  966: 				}				
  967: 				
  968: 				args[0] = &subpats;
  969: 				/* null terminate buffer */
  970: 				smart_str_0(&eval_buf);
  971: 				
  972: 				arg_replace_fci.param_count = 1;
  973: 				arg_replace_fci.params = args;
  974: 				arg_replace_fci.retval_ptr_ptr = &retval_ptr;
  975: 				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) {
  976: 					convert_to_string_ex(&retval_ptr);
  977: 					smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
  978: 					eval_buf.len = 0;
  979: 					zval_ptr_dtor(&retval_ptr);
  980: 				} else {
  981: 					efree(description);
  982: 					if (!EG(exception)) {
  983: 						php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
  984: 					}
  985: 				}
  986: 				zval_ptr_dtor(&subpats);
  987: 			}
  988: 
  989: 			n = regs->end[0];
  990: 			if ((pos - (OnigUChar *)string) < n) {
  991: 				pos = (OnigUChar *)string + n;
  992: 			} else {
  993: 				if (pos < string_lim) {
  994: 					smart_str_appendl(&out_buf, pos, 1); 
  995: 				}
  996: 				pos++;
  997: 			}
  998: 		} else { /* nomatch */
  999: 			/* stick that last bit of string on our output */
 1000: 			if (string_lim - pos > 0) {
 1001: 				smart_str_appendl(&out_buf, pos, string_lim - pos);
 1002: 			}
 1003: 		}
 1004: 		onig_region_free(regs, 0);
 1005: 	}
 1006: 
 1007: 	if (description) {
 1008: 		efree(description);
 1009: 	}
 1010: 	if (regs != NULL) {
 1011: 		onig_region_free(regs, 1);
 1012: 	}
 1013: 	smart_str_free(&eval_buf);
 1014: 
 1015: 	if (err <= -2) {
 1016: 		smart_str_free(&out_buf);	
 1017: 		RETVAL_FALSE;
 1018: 	} else {
 1019: 		smart_str_appendc(&out_buf, '\0');
 1020: 		RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
 1021: 	}
 1022: }
 1023: /* }}} */
 1024: 
 1025: /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
 1026:    Replace regular expression for multibyte string */
 1027: PHP_FUNCTION(mb_ereg_replace)
 1028: {
 1029: 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
 1030: }
 1031: /* }}} */
 1032: 
 1033: /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
 1034:    Case insensitive replace regular expression for multibyte string */
 1035: PHP_FUNCTION(mb_eregi_replace)
 1036: {
 1037: 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
 1038: }
 1039: /* }}} */
 1040: 
 1041: /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
 1042:     regular expression for multibyte string using replacement callback */
 1043: PHP_FUNCTION(mb_ereg_replace_callback)
 1044: {
 1045: 	_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
 1046: }
 1047: /* }}} */
 1048: 
 1049: /* {{{ proto array mb_split(string pattern, string string [, int limit])
 1050:    split multibyte string into array by regular expression */
 1051: PHP_FUNCTION(mb_split)
 1052: {
 1053: 	char *arg_pattern;
 1054: 	int arg_pattern_len;
 1055: 	php_mb_regex_t *re;
 1056: 	OnigRegion *regs = NULL;
 1057: 	char *string;
 1058: 	OnigUChar *pos, *chunk_pos;
 1059: 	int string_len;
 1060: 
 1061: 	int n, err;
 1062: 	long count = -1;
 1063: 
 1064: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
 1065: 		RETURN_FALSE;
 1066: 	} 
 1067: 
 1068: 	if (count > 0) {
 1069: 		count--;
 1070: 	}
 1071: 
 1072: 	/* create regex pattern buffer */
 1073: 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
 1074: 		RETURN_FALSE;
 1075: 	}
 1076: 
 1077: 	array_init(return_value);
 1078: 
 1079: 	chunk_pos = pos = (OnigUChar *)string;
 1080: 	err = 0;
 1081: 	regs = onig_region_new();
 1082: 	/* churn through str, generating array entries as we go */
 1083: 	while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
 1084: 		int beg, end;
 1085: 		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
 1086: 		if (err < 0) {
 1087: 			break;
 1088: 		}
 1089: 		beg = regs->beg[0], end = regs->end[0];
 1090: 		/* add it to the array */
 1091: 		if ((pos - (OnigUChar *)string) < end) {
 1092: 			if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
 1093: 				add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
 1094: 				--count;
 1095: 			} else {
 1096: 				err = -2;
 1097: 				break;
 1098: 			}
 1099: 			/* point at our new starting point */
 1100: 			chunk_pos = pos = (OnigUChar *)string + end;
 1101: 		} else {
 1102: 			pos++;
 1103: 		}
 1104: 		onig_region_free(regs, 0);
 1105: 	}
 1106: 
 1107: 	onig_region_free(regs, 1);
 1108: 
 1109: 	/* see if we encountered an error */
 1110: 	if (err <= -2) {
 1111: 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
 1112: 		onig_error_code_to_str(err_str, err);
 1113: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
 1114: 		zval_dtor(return_value);
 1115: 		RETURN_FALSE;
 1116: 	}
 1117: 
 1118: 	/* otherwise we just have one last element to add to the array */
 1119: 	n = ((OnigUChar *)(string + string_len) - chunk_pos);
 1120: 	if (n > 0) {
 1121: 		add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
 1122: 	} else {
 1123: 		add_next_index_stringl(return_value, "", 0, 1);
 1124: 	}
 1125: }
 1126: /* }}} */
 1127: 
 1128: /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
 1129:    Regular expression match for multibyte string */
 1130: PHP_FUNCTION(mb_ereg_match)
 1131: {
 1132: 	char *arg_pattern;
 1133: 	int arg_pattern_len;
 1134: 
 1135: 	char *string;
 1136: 	int string_len;
 1137: 
 1138: 	php_mb_regex_t *re;
 1139: 	OnigSyntaxType *syntax;
 1140: 	OnigOptionType option = 0;
 1141: 	int err;
 1142: 
 1143: 	{
 1144: 		char *option_str = NULL;
 1145: 		int option_str_len = 0;
 1146: 
 1147: 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
 1148: 		                          &arg_pattern, &arg_pattern_len, &string, &string_len,
 1149: 		                          &option_str, &option_str_len)==FAILURE) {
 1150: 			RETURN_FALSE;
 1151: 		}
 1152: 
 1153: 		if (option_str != NULL) {
 1154: 			_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
 1155: 		} else {
 1156: 			option |= MBREX(regex_default_options);
 1157: 			syntax = MBREX(regex_default_syntax);
 1158: 		}
 1159: 	}
 1160: 
 1161: 	if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
 1162: 		RETURN_FALSE;
 1163: 	}
 1164: 
 1165: 	/* match */
 1166: 	err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
 1167: 	if (err >= 0) {
 1168: 		RETVAL_TRUE;
 1169: 	} else {
 1170: 		RETVAL_FALSE;
 1171: 	}
 1172: }
 1173: /* }}} */
 1174: 
 1175: /* regex search */
 1176: /* {{{ _php_mb_regex_ereg_search_exec */
 1177: static void
 1178: _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
 1179: {
 1180: 	size_t argc = ZEND_NUM_ARGS();
 1181: 	char *arg_pattern, *arg_options;
 1182: 	int arg_pattern_len, arg_options_len;
 1183: 	int n, i, err, pos, len, beg, end;
 1184: 	OnigOptionType option;
 1185: 	OnigUChar *str;
 1186: 	OnigSyntaxType *syntax;
 1187: 
 1188: 	if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
 1189: 		return;
 1190: 	}
 1191: 
 1192: 	option = MBREX(regex_default_options);
 1193: 
 1194: 	if (argc == 2) {
 1195: 		option = 0;
 1196: 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
 1197: 	}
 1198: 
 1199: 	if (argc > 0) {
 1200: 		/* create regex pattern buffer */
 1201: 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
 1202: 			RETURN_FALSE;
 1203: 		}
 1204: 	}
 1205: 
 1206: 	pos = MBREX(search_pos);
 1207: 	str = NULL;
 1208: 	len = 0;
 1209: 	if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
 1210: 		str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
 1211: 		len = Z_STRLEN_P(MBREX(search_str));
 1212: 	}
 1213: 
 1214: 	if (MBREX(search_re) == NULL) {
 1215: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
 1216: 		RETURN_FALSE;
 1217: 	}
 1218: 
 1219: 	if (str == NULL) {
 1220: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
 1221: 		RETURN_FALSE;
 1222: 	}
 1223: 
 1224: 	if (MBREX(search_regs)) {
 1225: 		onig_region_free(MBREX(search_regs), 1);
 1226: 	}
 1227: 	MBREX(search_regs) = onig_region_new();
 1228: 
 1229: 	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
 1230: 	if (err == ONIG_MISMATCH) {
 1231: 		MBREX(search_pos) = len;
 1232: 		RETVAL_FALSE;
 1233: 	} else if (err <= -2) {
 1234: 		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
 1235: 		onig_error_code_to_str(err_str, err);
 1236: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
 1237: 		RETVAL_FALSE;
 1238: 	} else {
 1239: 		if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
 1240: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
 1241: 		}
 1242: 		switch (mode) {
 1243: 		case 1:
 1244: 			array_init(return_value);
 1245: 			beg = MBREX(search_regs)->beg[0];
 1246: 			end = MBREX(search_regs)->end[0];
 1247: 			add_next_index_long(return_value, beg);
 1248: 			add_next_index_long(return_value, end - beg);
 1249: 			break;
 1250: 		case 2:
 1251: 			array_init(return_value);
 1252: 			n = MBREX(search_regs)->num_regs;
 1253: 			for (i = 0; i < n; i++) {
 1254: 				beg = MBREX(search_regs)->beg[i];
 1255: 				end = MBREX(search_regs)->end[i];
 1256: 				if (beg >= 0 && beg <= end && end <= len) {
 1257: 					add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
 1258: 				} else {
 1259: 					add_index_bool(return_value, i, 0);
 1260: 				}
 1261: 			}
 1262: 			break;
 1263: 		default:
 1264: 			RETVAL_TRUE;
 1265: 			break;
 1266: 		}
 1267: 		end = MBREX(search_regs)->end[0];
 1268: 		if (pos < end) {
 1269: 			MBREX(search_pos) = end;
 1270: 		} else {
 1271: 			MBREX(search_pos) = pos + 1;
 1272: 		}
 1273: 	}
 1274: 
 1275: 	if (err < 0) {
 1276: 		onig_region_free(MBREX(search_regs), 1);
 1277: 		MBREX(search_regs) = (OnigRegion *)NULL;
 1278: 	}
 1279: }
 1280: /* }}} */
 1281: 
 1282: /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
 1283:    Regular expression search for multibyte string */
 1284: PHP_FUNCTION(mb_ereg_search)
 1285: {
 1286: 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
 1287: }
 1288: /* }}} */
 1289: 
 1290: /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
 1291:    Regular expression search for multibyte string */
 1292: PHP_FUNCTION(mb_ereg_search_pos)
 1293: {
 1294: 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 1295: }
 1296: /* }}} */
 1297: 
 1298: /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
 1299:    Regular expression search for multibyte string */
 1300: PHP_FUNCTION(mb_ereg_search_regs)
 1301: {
 1302: 	_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
 1303: }
 1304: /* }}} */
 1305: 
 1306: /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
 1307:    Initialize string and regular expression for search. */
 1308: PHP_FUNCTION(mb_ereg_search_init)
 1309: {
 1310: 	size_t argc = ZEND_NUM_ARGS();
 1311: 	zval *arg_str;
 1312: 	char *arg_pattern = NULL, *arg_options = NULL;
 1313: 	int arg_pattern_len = 0, arg_options_len = 0;
 1314: 	OnigSyntaxType *syntax = NULL;
 1315: 	OnigOptionType option;
 1316: 
 1317: 	if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
 1318: 		return;
 1319: 	}
 1320: 	
 1321: 	if (argc > 1 && arg_pattern_len == 0) {
 1322: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
 1323: 		RETURN_FALSE;
 1324: 	}
 1325: 
 1326: 	option = MBREX(regex_default_options);
 1327: 	syntax = MBREX(regex_default_syntax);
 1328: 
 1329: 	if (argc == 3) {
 1330: 		option = 0;
 1331: 		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
 1332: 	}
 1333: 
 1334: 	if (argc > 1) {
 1335: 		/* create regex pattern buffer */
 1336: 		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
 1337: 			RETURN_FALSE;
 1338: 		}
 1339: 	}
 1340: 
 1341: 	if (MBREX(search_str) != NULL) {
 1342: 		zval_ptr_dtor(&MBREX(search_str));
 1343: 		MBREX(search_str) = (zval *)NULL;
 1344: 	}
 1345: 
 1346: 	MBREX(search_str) = arg_str;
 1347: 	Z_ADDREF_P(MBREX(search_str));
 1348: 	SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
 1349: 
 1350: 	MBREX(search_pos) = 0;
 1351: 
 1352: 	if (MBREX(search_regs) != NULL) {
 1353: 		onig_region_free(MBREX(search_regs), 1);
 1354: 		MBREX(search_regs) = (OnigRegion *) NULL;
 1355: 	}
 1356: 
 1357: 	RETURN_TRUE;
 1358: }
 1359: /* }}} */
 1360: 
 1361: /* {{{ proto array mb_ereg_search_getregs(void)
 1362:    Get matched substring of the last time */
 1363: PHP_FUNCTION(mb_ereg_search_getregs)
 1364: {
 1365: 	int n, i, len, beg, end;
 1366: 	OnigUChar *str;
 1367: 
 1368: 	if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
 1369: 		array_init(return_value);
 1370: 
 1371: 		str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
 1372: 		len = Z_STRLEN_P(MBREX(search_str));
 1373: 		n = MBREX(search_regs)->num_regs;
 1374: 		for (i = 0; i < n; i++) {
 1375: 			beg = MBREX(search_regs)->beg[i];
 1376: 			end = MBREX(search_regs)->end[i];
 1377: 			if (beg >= 0 && beg <= end && end <= len) {
 1378: 				add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
 1379: 			} else {
 1380: 				add_index_bool(return_value, i, 0);
 1381: 			}
 1382: 		}
 1383: 	} else {
 1384: 		RETVAL_FALSE;
 1385: 	}
 1386: }
 1387: /* }}} */
 1388: 
 1389: /* {{{ proto int mb_ereg_search_getpos(void)
 1390:    Get search start position */
 1391: PHP_FUNCTION(mb_ereg_search_getpos)
 1392: {
 1393: 	RETVAL_LONG(MBREX(search_pos));
 1394: }
 1395: /* }}} */
 1396: 
 1397: /* {{{ proto bool mb_ereg_search_setpos(int position)
 1398:    Set search start position */
 1399: PHP_FUNCTION(mb_ereg_search_setpos)
 1400: {
 1401: 	long position;
 1402: 
 1403: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
 1404: 		return;
 1405: 	}
 1406: 
 1407: 	if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
 1408: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
 1409: 		MBREX(search_pos) = 0;
 1410: 		RETURN_FALSE;
 1411: 	}
 1412: 
 1413: 	MBREX(search_pos) = position;
 1414: 	RETURN_TRUE;
 1415: }
 1416: /* }}} */
 1417: 
 1418: /* {{{ php_mb_regex_set_options */
 1419: static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC) 
 1420: {
 1421: 	if (prev_options != NULL) {
 1422: 		*prev_options = MBREX(regex_default_options);
 1423: 	}
 1424: 	if (prev_syntax != NULL) {
 1425: 		*prev_syntax = MBREX(regex_default_syntax);
 1426: 	}
 1427: 	MBREX(regex_default_options) = options;
 1428: 	MBREX(regex_default_syntax) = syntax;
 1429: }
 1430: /* }}} */
 1431: 
 1432: /* {{{ proto string mb_regex_set_options([string options])
 1433:    Set or get the default options for mbregex functions */
 1434: PHP_FUNCTION(mb_regex_set_options)
 1435: {
 1436: 	OnigOptionType opt;
 1437: 	OnigSyntaxType *syntax;
 1438: 	char *string = NULL;
 1439: 	int string_len;
 1440: 	char buf[16];
 1441: 
 1442: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
 1443: 	                          &string, &string_len) == FAILURE) {
 1444: 		RETURN_FALSE;
 1445: 	}
 1446: 	if (string != NULL) {
 1447: 		opt = 0;
 1448: 		syntax = NULL;
 1449: 		_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
 1450: 		_php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
 1451: 	} else {
 1452: 		opt = MBREX(regex_default_options);
 1453: 		syntax = MBREX(regex_default_syntax);
 1454: 	}
 1455: 	_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
 1456: 
 1457: 	RETVAL_STRING(buf, 1);
 1458: }
 1459: /* }}} */
 1460: 
 1461: #endif	/* HAVE_MBREGEX */
 1462: 
 1463: /*
 1464:  * Local variables:
 1465:  * tab-width: 4
 1466:  * c-basic-offset: 4
 1467:  * End:
 1468:  * vim600: fdm=marker
 1469:  * vim: noet sw=4 ts=4
 1470:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>