Annotation of embedaddon/php/ext/mbstring/oniguruma/regposix.c, revision 1.1
1.1 ! misho 1: /**********************************************************************
! 2: regposix.c - Oniguruma (regular expression library)
! 3: **********************************************************************/
! 4: /*-
! 5: * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #define regex_t onig_regex_t
! 31: #include "regint.h"
! 32: #undef regex_t
! 33: #include "onigposix.h"
! 34:
! 35: #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
! 36: #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
! 37:
! 38: /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
! 39: #define ENC_STRING_LEN(enc,s,len) do { \
! 40: if (ONIGENC_MBC_MINLEN(enc) == 1) { \
! 41: UChar* tmps = (UChar* )(s); \
! 42: while (*tmps != 0) tmps++; \
! 43: len = tmps - (UChar* )(s); \
! 44: } \
! 45: else { \
! 46: len = onigenc_str_bytelen_null(enc, (UChar* )s); \
! 47: } \
! 48: } while(0)
! 49:
! 50: typedef struct {
! 51: int onig_err;
! 52: int posix_err;
! 53: } O2PERR;
! 54:
! 55: static int
! 56: onig2posix_error_code(int code)
! 57: {
! 58: static const O2PERR o2p[] = {
! 59: { ONIG_MISMATCH, REG_NOMATCH },
! 60: { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
! 61: { ONIGERR_MEMORY, REG_ESPACE },
! 62: { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
! 63: { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
! 64: { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
! 65: { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
! 66: { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
! 67: { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
! 68: { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
! 69: { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
! 70: { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
! 71: { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
! 72: { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
! 73: { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
! 74: { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
! 75: { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
! 76: { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
! 77: { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
! 78: { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
! 79: { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
! 80: { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
! 81: { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
! 82: { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
! 83: { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
! 84: { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
! 85: { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
! 86: { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
! 87: { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
! 88: { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
! 89: { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
! 90: { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
! 91: { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
! 92: { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
! 93: { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
! 94: { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
! 95: { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
! 96: { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
! 97: { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
! 98: { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
! 99: { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
! 100: { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
! 101: { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
! 102: { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
! 103: { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
! 104: { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
! 105: { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
! 106: { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
! 107: { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
! 108: { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
! 109: { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
! 110: { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
! 111: { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
! 112: { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
! 113: { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
! 114: { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
! 115: { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
! 116: { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
! 117: { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
! 118:
! 119: };
! 120:
! 121: int i;
! 122:
! 123: if (code >= 0) return 0;
! 124:
! 125: for (i = 0; i < sizeof(o2p) / sizeof(o2p[0]); i++) {
! 126: if (code == o2p[i].onig_err)
! 127: return o2p[i].posix_err;
! 128: }
! 129:
! 130: return REG_EONIG_INTERNAL; /* but, unknown error code */
! 131: }
! 132:
! 133: extern int
! 134: regcomp(regex_t* reg, const char* pattern, int posix_options)
! 135: {
! 136: int r, len;
! 137: OnigSyntaxType* syntax = OnigDefaultSyntax;
! 138: OnigOptionType options;
! 139:
! 140: if ((posix_options & REG_EXTENDED) == 0)
! 141: syntax = ONIG_SYNTAX_POSIX_BASIC;
! 142:
! 143: options = syntax->options;
! 144: if ((posix_options & REG_ICASE) != 0)
! 145: ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
! 146: if ((posix_options & REG_NEWLINE) != 0) {
! 147: ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
! 148: ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
! 149: }
! 150:
! 151: reg->comp_options = posix_options;
! 152:
! 153: ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
! 154: r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
! 155: options, OnigEncDefaultCharEncoding, syntax,
! 156: (OnigErrorInfo* )NULL);
! 157: if (r != ONIG_NORMAL) {
! 158: return onig2posix_error_code(r);
! 159: }
! 160:
! 161: reg->re_nsub = ONIG_C(reg)->num_mem;
! 162: return 0;
! 163: }
! 164:
! 165: extern int
! 166: regexec(regex_t* reg, const char* str, size_t nmatch,
! 167: regmatch_t pmatch[], int posix_options)
! 168: {
! 169: int r, i, len;
! 170: UChar* end;
! 171: regmatch_t* pm;
! 172: OnigOptionType options;
! 173:
! 174: options = ONIG_OPTION_POSIX_REGION;
! 175: if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
! 176: if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
! 177:
! 178: if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
! 179: pm = (regmatch_t* )NULL;
! 180: nmatch = 0;
! 181: }
! 182: else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
! 183: pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
! 184: * (ONIG_C(reg)->num_mem + 1));
! 185: if (pm == NULL)
! 186: return REG_ESPACE;
! 187: }
! 188: else {
! 189: pm = pmatch;
! 190: }
! 191:
! 192: ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
! 193: end = (UChar* )(str + len);
! 194: r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
! 195: (OnigRegion* )pm, options);
! 196:
! 197: if (r >= 0) {
! 198: r = 0; /* Match */
! 199: if (pm != pmatch && pm != NULL) {
! 200: xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
! 201: }
! 202: }
! 203: else if (r == ONIG_MISMATCH) {
! 204: r = REG_NOMATCH;
! 205: for (i = 0; i < (int )nmatch; i++)
! 206: pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
! 207: }
! 208: else {
! 209: r = onig2posix_error_code(r);
! 210: }
! 211:
! 212: if (pm != pmatch && pm != NULL)
! 213: xfree(pm);
! 214:
! 215: #if 0
! 216: if (reg->re_nsub > nmatch - 1)
! 217: reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
! 218: #endif
! 219:
! 220: return r;
! 221: }
! 222:
! 223: extern void
! 224: regfree(regex_t* reg)
! 225: {
! 226: onig_free(ONIG_C(reg));
! 227: }
! 228:
! 229:
! 230: extern void
! 231: reg_set_encoding(int mb_code)
! 232: {
! 233: OnigEncoding enc;
! 234:
! 235: switch (mb_code) {
! 236: case REG_POSIX_ENCODING_ASCII:
! 237: enc = ONIG_ENCODING_ASCII;
! 238: break;
! 239: case REG_POSIX_ENCODING_EUC_JP:
! 240: enc = ONIG_ENCODING_EUC_JP;
! 241: break;
! 242: case REG_POSIX_ENCODING_SJIS:
! 243: enc = ONIG_ENCODING_SJIS;
! 244: break;
! 245: case REG_POSIX_ENCODING_UTF8:
! 246: enc = ONIG_ENCODING_UTF8;
! 247: break;
! 248: case REG_POSIX_ENCODING_UTF16_BE:
! 249: enc = ONIG_ENCODING_UTF16_BE;
! 250: break;
! 251: case REG_POSIX_ENCODING_UTF16_LE:
! 252: enc = ONIG_ENCODING_UTF16_LE;
! 253: break;
! 254:
! 255: default:
! 256: return ;
! 257: break;
! 258: }
! 259:
! 260: onigenc_set_default_encoding(enc);
! 261: }
! 262:
! 263: extern int
! 264: reg_name_to_group_numbers(regex_t* reg,
! 265: const unsigned char* name, const unsigned char* name_end, int** nums)
! 266: {
! 267: return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
! 268: }
! 269:
! 270: typedef struct {
! 271: int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
! 272: regex_t* reg;
! 273: void* arg;
! 274: } i_wrap;
! 275:
! 276: static int i_wrapper(const unsigned char* name, const unsigned char* name_end,
! 277: int ng, int* gs,
! 278: onig_regex_t* reg, void* arg)
! 279: {
! 280: i_wrap* warg = (i_wrap* )arg;
! 281:
! 282: return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
! 283: }
! 284:
! 285: extern int
! 286: reg_foreach_name(regex_t* reg,
! 287: int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
! 288: void* arg)
! 289: {
! 290: i_wrap warg;
! 291:
! 292: warg.func = func;
! 293: warg.reg = reg;
! 294: warg.arg = arg;
! 295:
! 296: return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
! 297: }
! 298:
! 299: extern int
! 300: reg_number_of_names(regex_t* reg)
! 301: {
! 302: return onig_number_of_names(ONIG_C(reg));
! 303: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>