Annotation of embedaddon/php/ext/mbstring/oniguruma/regposix.c, revision 1.1.1.1

1.1       misho       1: /**********************************************************************
                      2:   regposix.c - Oniguruma (regular expression library)
                      3: **********************************************************************/
                      4: /*-
                      5:  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29: 
                     30: #define regex_t   onig_regex_t
                     31: #include "regint.h"
                     32: #undef regex_t
                     33: #include "onigposix.h"
                     34: 
                     35: #define ONIG_C(reg)    ((onig_regex_t* )((reg)->onig))
                     36: #define PONIG_C(reg)   ((onig_regex_t** )(&(reg)->onig))
                     37: 
                     38: /* #define ENC_STRING_LEN(enc,s,len)    len = strlen(s) */
                     39: #define ENC_STRING_LEN(enc,s,len) do { \
                     40:   if (ONIGENC_MBC_MINLEN(enc) == 1) { \
                     41:     UChar* tmps = (UChar* )(s); \
                     42:     while (*tmps != 0) tmps++; \
                     43:     len = tmps - (UChar* )(s); \
                     44:   } \
                     45:   else { \
                     46:     len = onigenc_str_bytelen_null(enc, (UChar* )s); \
                     47:   } \
                     48: } while(0)
                     49: 
                     50: typedef struct {
                     51:   int onig_err;
                     52:   int posix_err;
                     53: } O2PERR;
                     54: 
                     55: static int
                     56: onig2posix_error_code(int code)
                     57: {
                     58:   static const O2PERR o2p[] = {
                     59:     { ONIG_MISMATCH,                                      REG_NOMATCH },
                     60:     { ONIG_NO_SUPPORT_CONFIG,                             REG_EONIG_INTERNAL },
                     61:     { ONIGERR_MEMORY,                                     REG_ESPACE  },
                     62:     { ONIGERR_MATCH_STACK_LIMIT_OVER,                     REG_EONIG_INTERNAL },
                     63:     { ONIGERR_TYPE_BUG,                                   REG_EONIG_INTERNAL },
                     64:     { ONIGERR_PARSER_BUG,                                 REG_EONIG_INTERNAL },
                     65:     { ONIGERR_STACK_BUG,                                  REG_EONIG_INTERNAL },
                     66:     { ONIGERR_UNDEFINED_BYTECODE,                         REG_EONIG_INTERNAL },
                     67:     { ONIGERR_UNEXPECTED_BYTECODE,                        REG_EONIG_INTERNAL },
                     68:     { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED,             REG_EONIG_BADARG },
                     69:     { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
                     70:     { ONIGERR_INVALID_ARGUMENT,                           REG_EONIG_BADARG },
                     71:     { ONIGERR_END_PATTERN_AT_LEFT_BRACE,                  REG_EBRACE  },
                     72:     { ONIGERR_END_PATTERN_AT_LEFT_BRACKET,                REG_EBRACK  },
                     73:     { ONIGERR_EMPTY_CHAR_CLASS,                           REG_ECTYPE  },
                     74:     { ONIGERR_PREMATURE_END_OF_CHAR_CLASS,                REG_ECTYPE  },
                     75:     { ONIGERR_END_PATTERN_AT_ESCAPE,                      REG_EESCAPE },
                     76:     { ONIGERR_END_PATTERN_AT_META,                        REG_EESCAPE },
                     77:     { ONIGERR_END_PATTERN_AT_CONTROL,                     REG_EESCAPE },
                     78:     { ONIGERR_META_CODE_SYNTAX,                           REG_BADPAT  },
                     79:     { ONIGERR_CONTROL_CODE_SYNTAX,                        REG_BADPAT  },
                     80:     { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE,           REG_ECTYPE  },
                     81:     { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE,         REG_ECTYPE  },
                     82:     { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS,    REG_ECTYPE  },
                     83:     { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED,    REG_BADRPT  },
                     84:     { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID,          REG_BADRPT  },
                     85:     { ONIGERR_NESTED_REPEAT_OPERATOR,                     REG_BADRPT  },
                     86:     { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS,                REG_EPAREN  },
                     87:     { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS,     REG_EPAREN  },
                     88:     { ONIGERR_END_PATTERN_IN_GROUP,                       REG_BADPAT  },
                     89:     { ONIGERR_UNDEFINED_GROUP_OPTION,                     REG_BADPAT  },
                     90:     { ONIGERR_INVALID_POSIX_BRACKET_TYPE,                 REG_BADPAT  },
                     91:     { ONIGERR_INVALID_LOOK_BEHIND_PATTERN,                REG_BADPAT  },
                     92:     { ONIGERR_INVALID_REPEAT_RANGE_PATTERN,               REG_BADPAT  },
                     93:     { ONIGERR_TOO_BIG_NUMBER,                             REG_BADPAT  },
                     94:     { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE,            REG_BADBR   },
                     95:     { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE,   REG_BADBR   },
                     96:     { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS,                  REG_ECTYPE  },
                     97:     { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE,        REG_ECTYPE  },
                     98:     { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES,                 REG_ECTYPE  },
                     99:     { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING,                REG_BADPAT  },
                    100:     { ONIGERR_TOO_BIG_BACKREF_NUMBER,                     REG_ESUBREG },
                    101:     { ONIGERR_INVALID_BACKREF,                            REG_ESUBREG },
                    102:     { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED,       REG_BADPAT  },
                    103:     { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE,                    REG_EONIG_BADWC },
                    104:     { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE,                   REG_EONIG_BADWC },
                    105:     { ONIGERR_INVALID_WIDE_CHAR_VALUE,                    REG_EONIG_BADWC },
                    106:     { ONIGERR_EMPTY_GROUP_NAME,                           REG_BADPAT },
                    107:     { ONIGERR_INVALID_GROUP_NAME,                         REG_BADPAT },
                    108:     { ONIGERR_INVALID_CHAR_IN_GROUP_NAME,                 REG_BADPAT },
                    109:     { ONIGERR_UNDEFINED_NAME_REFERENCE,                   REG_BADPAT },
                    110:     { ONIGERR_UNDEFINED_GROUP_REFERENCE,                  REG_BADPAT },
                    111:     { ONIGERR_MULTIPLEX_DEFINED_NAME,                     REG_BADPAT },
                    112:     { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,             REG_BADPAT },
                    113:     { ONIGERR_NEVER_ENDING_RECURSION,                     REG_BADPAT },
                    114:     { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY,      REG_BADPAT },
                    115:     { ONIGERR_INVALID_CHAR_PROPERTY_NAME,                 REG_BADPAT },
                    116:     { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION,         REG_EONIG_BADARG },
                    117:     { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT,               REG_EONIG_THREAD }
                    118: 
                    119:   };
                    120: 
                    121:   int i;
                    122: 
                    123:   if (code >= 0) return 0;
                    124: 
                    125:   for (i = 0; i < sizeof(o2p) / sizeof(o2p[0]); i++) {
                    126:     if (code == o2p[i].onig_err)
                    127:       return o2p[i].posix_err;
                    128:   }
                    129: 
                    130:   return REG_EONIG_INTERNAL;  /* but, unknown error code */
                    131: }
                    132: 
                    133: extern int
                    134: regcomp(regex_t* reg, const char* pattern, int posix_options)
                    135: {
                    136:   int r, len;
                    137:   OnigSyntaxType* syntax = OnigDefaultSyntax;
                    138:   OnigOptionType options;
                    139: 
                    140:   if ((posix_options & REG_EXTENDED) == 0)
                    141:     syntax = ONIG_SYNTAX_POSIX_BASIC;
                    142: 
                    143:   options = syntax->options;
                    144:   if ((posix_options & REG_ICASE)   != 0)
                    145:     ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
                    146:   if ((posix_options & REG_NEWLINE) != 0) {
                    147:     ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
                    148:     ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
                    149:   }
                    150: 
                    151:   reg->comp_options = posix_options;
                    152: 
                    153:   ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
                    154:   r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
                    155:               options, OnigEncDefaultCharEncoding, syntax,
                    156:               (OnigErrorInfo* )NULL);
                    157:   if (r != ONIG_NORMAL) {
                    158:     return onig2posix_error_code(r);
                    159:   }
                    160: 
                    161:   reg->re_nsub = ONIG_C(reg)->num_mem;
                    162:   return 0;
                    163: }
                    164: 
                    165: extern int
                    166: regexec(regex_t* reg, const char* str, size_t nmatch,
                    167:        regmatch_t pmatch[], int posix_options)
                    168: {
                    169:   int r, i, len;
                    170:   UChar* end;
                    171:   regmatch_t* pm;
                    172:   OnigOptionType options;
                    173: 
                    174:   options = ONIG_OPTION_POSIX_REGION;
                    175:   if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
                    176:   if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
                    177: 
                    178:   if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
                    179:     pm = (regmatch_t* )NULL;
                    180:     nmatch = 0;
                    181:   }
                    182:   else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
                    183:     pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
                    184:                                * (ONIG_C(reg)->num_mem + 1));
                    185:     if (pm == NULL)
                    186:       return REG_ESPACE;
                    187:   }
                    188:   else {
                    189:     pm = pmatch;
                    190:   }
                    191: 
                    192:   ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
                    193:   end = (UChar* )(str + len);
                    194:   r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
                    195:                  (OnigRegion* )pm, options);
                    196: 
                    197:   if (r >= 0) {
                    198:     r = 0; /* Match */
                    199:     if (pm != pmatch && pm != NULL) {
                    200:       xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
                    201:     }
                    202:   }
                    203:   else if (r == ONIG_MISMATCH) {
                    204:     r = REG_NOMATCH;
                    205:     for (i = 0; i < (int )nmatch; i++)
                    206:       pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
                    207:   }
                    208:   else {
                    209:     r = onig2posix_error_code(r);
                    210:   }
                    211: 
                    212:   if (pm != pmatch && pm != NULL)
                    213:     xfree(pm);
                    214: 
                    215: #if 0
                    216:   if (reg->re_nsub > nmatch - 1)
                    217:     reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
                    218: #endif
                    219: 
                    220:   return r;
                    221: }
                    222: 
                    223: extern void
                    224: regfree(regex_t* reg)
                    225: {
                    226:   onig_free(ONIG_C(reg));
                    227: }
                    228: 
                    229: 
                    230: extern void
                    231: reg_set_encoding(int mb_code)
                    232: {
                    233:   OnigEncoding enc;
                    234: 
                    235:   switch (mb_code) {
                    236:   case REG_POSIX_ENCODING_ASCII:
                    237:     enc = ONIG_ENCODING_ASCII;
                    238:     break;
                    239:   case REG_POSIX_ENCODING_EUC_JP:
                    240:     enc = ONIG_ENCODING_EUC_JP;
                    241:     break;
                    242:   case REG_POSIX_ENCODING_SJIS:
                    243:     enc = ONIG_ENCODING_SJIS;
                    244:     break;
                    245:   case REG_POSIX_ENCODING_UTF8:
                    246:     enc = ONIG_ENCODING_UTF8;
                    247:     break;
                    248:   case REG_POSIX_ENCODING_UTF16_BE:
                    249:     enc = ONIG_ENCODING_UTF16_BE;
                    250:     break;
                    251:   case REG_POSIX_ENCODING_UTF16_LE:
                    252:     enc = ONIG_ENCODING_UTF16_LE;
                    253:     break;
                    254: 
                    255:   default:
                    256:     return ;
                    257:     break;
                    258:   }
                    259: 
                    260:   onigenc_set_default_encoding(enc);
                    261: }
                    262: 
                    263: extern int
                    264: reg_name_to_group_numbers(regex_t* reg,
                    265:   const unsigned char* name, const unsigned char* name_end, int** nums)
                    266: {
                    267:   return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
                    268: }
                    269: 
                    270: typedef struct {
                    271:   int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
                    272:   regex_t* reg;
                    273:   void* arg;
                    274: } i_wrap;
                    275: 
                    276: static int i_wrapper(const unsigned char* name, const unsigned char* name_end,
                    277:                     int ng, int* gs,
                    278:                     onig_regex_t* reg, void* arg)
                    279: {
                    280:   i_wrap* warg = (i_wrap* )arg;
                    281: 
                    282:   return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
                    283: }
                    284: 
                    285: extern int
                    286: reg_foreach_name(regex_t* reg,
                    287:  int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
                    288:  void* arg)
                    289: {
                    290:   i_wrap warg;
                    291: 
                    292:   warg.func = func;
                    293:   warg.reg  = reg;
                    294:   warg.arg  = arg;
                    295: 
                    296:   return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
                    297: }
                    298: 
                    299: extern int
                    300: reg_number_of_names(regex_t* reg)
                    301: {
                    302:   return onig_number_of_names(ONIG_C(reg));
                    303: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>