Annotation of embedaddon/php/ext/mbstring/oniguruma/regparse.c, revision 1.1
1.1 ! misho 1: /**********************************************************************
! 2: regparse.c - Oniguruma (regular expression library)
! 3: **********************************************************************/
! 4: /*-
! 5: * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include "regparse.h"
! 31:
! 32: #define WARN_BUFSIZE 256
! 33:
! 34: OnigSyntaxType OnigSyntaxRuby = {
! 35: (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
! 36: ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
! 37: ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
! 38: ONIG_SYN_OP_ESC_C_CONTROL )
! 39: & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
! 40: , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
! 41: ONIG_SYN_OP2_OPTION_RUBY |
! 42: ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
! 43: ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
! 44: ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
! 45: ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
! 46: ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
! 47: ONIG_SYN_OP2_ESC_H_XDIGIT )
! 48: , ( SYN_GNU_REGEX_BV |
! 49: ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
! 50: ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
! 51: ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
! 52: ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
! 53: ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
! 54: ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
! 55: ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
! 56: , ONIG_OPTION_NONE
! 57: };
! 58:
! 59: OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
! 60:
! 61: extern void onig_null_warn(const char* s) { }
! 62:
! 63: #ifdef RUBY_PLATFORM
! 64: extern void
! 65: onig_rb_warn(const char* s)
! 66: {
! 67: rb_warn("%s", s);
! 68: }
! 69:
! 70: extern void
! 71: onig_rb_warning(const char* s)
! 72: {
! 73: rb_warning("%s", s);
! 74: }
! 75: #endif
! 76:
! 77: #ifdef DEFAULT_WARN_FUNCTION
! 78: static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
! 79: #else
! 80: static OnigWarnFunc onig_warn = onig_null_warn;
! 81: #endif
! 82:
! 83: #ifdef DEFAULT_VERB_WARN_FUNCTION
! 84: static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
! 85: #else
! 86: static OnigWarnFunc onig_verb_warn = onig_null_warn;
! 87: #endif
! 88:
! 89: extern void onig_set_warn_func(OnigWarnFunc f)
! 90: {
! 91: onig_warn = f;
! 92: }
! 93:
! 94: extern void onig_set_verb_warn_func(OnigWarnFunc f)
! 95: {
! 96: onig_verb_warn = f;
! 97: }
! 98:
! 99: static void
! 100: bbuf_free(BBuf* bbuf)
! 101: {
! 102: if (IS_NOT_NULL(bbuf)) {
! 103: if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
! 104: xfree(bbuf);
! 105: }
! 106: }
! 107:
! 108: static int
! 109: bbuf_clone(BBuf** rto, BBuf* from)
! 110: {
! 111: int r;
! 112: BBuf *to;
! 113:
! 114: *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
! 115: CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
! 116: r = BBUF_INIT(to, from->alloc);
! 117: if (r != 0) return r;
! 118: to->used = from->used;
! 119: xmemcpy(to->p, from->p, from->used);
! 120: return 0;
! 121: }
! 122:
! 123: #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
! 124:
! 125: #define MBCODE_START_POS(enc) \
! 126: (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
! 127:
! 128: #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
! 129: add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
! 130:
! 131: #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
! 132: if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
! 133: r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
! 134: if (r) return r;\
! 135: }\
! 136: } while (0)
! 137:
! 138:
! 139: #define BITSET_IS_EMPTY(bs,empty) do {\
! 140: int i;\
! 141: empty = 1;\
! 142: for (i = 0; i < BITSET_SIZE; i++) {\
! 143: if ((bs)[i] != 0) {\
! 144: empty = 0; break;\
! 145: }\
! 146: }\
! 147: } while (0)
! 148:
! 149: static void
! 150: bitset_set_range(BitSetRef bs, int from, int to)
! 151: {
! 152: int i;
! 153: for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
! 154: BITSET_SET_BIT(bs, i);
! 155: }
! 156: }
! 157:
! 158: #if 0
! 159: static void
! 160: bitset_set_all(BitSetRef bs)
! 161: {
! 162: int i;
! 163: for (i = 0; i < BITSET_SIZE; i++) {
! 164: bs[i] = ~((Bits )0);
! 165: }
! 166: }
! 167: #endif
! 168:
! 169: static void
! 170: bitset_invert(BitSetRef bs)
! 171: {
! 172: int i;
! 173: for (i = 0; i < BITSET_SIZE; i++) {
! 174: bs[i] = ~(bs[i]);
! 175: }
! 176: }
! 177:
! 178: static void
! 179: bitset_invert_to(BitSetRef from, BitSetRef to)
! 180: {
! 181: int i;
! 182: for (i = 0; i < BITSET_SIZE; i++) {
! 183: to[i] = ~(from[i]);
! 184: }
! 185: }
! 186:
! 187: static void
! 188: bitset_and(BitSetRef dest, BitSetRef bs)
! 189: {
! 190: int i;
! 191: for (i = 0; i < BITSET_SIZE; i++) {
! 192: dest[i] &= bs[i];
! 193: }
! 194: }
! 195:
! 196: static void
! 197: bitset_or(BitSetRef dest, BitSetRef bs)
! 198: {
! 199: int i;
! 200: for (i = 0; i < BITSET_SIZE; i++) {
! 201: dest[i] |= bs[i];
! 202: }
! 203: }
! 204:
! 205: static void
! 206: bitset_copy(BitSetRef dest, BitSetRef bs)
! 207: {
! 208: int i;
! 209: for (i = 0; i < BITSET_SIZE; i++) {
! 210: dest[i] = bs[i];
! 211: }
! 212: }
! 213:
! 214: extern int
! 215: onig_strncmp(const UChar* s1, const UChar* s2, int n)
! 216: {
! 217: int x;
! 218:
! 219: while (n-- > 0) {
! 220: x = *s2++ - *s1++;
! 221: if (x) return x;
! 222: }
! 223: return 0;
! 224: }
! 225:
! 226: static void
! 227: k_strcpy(UChar* dest, const UChar* src, const UChar* end)
! 228: {
! 229: int len = end - src;
! 230: if (len > 0) {
! 231: xmemcpy(dest, src, len);
! 232: dest[len] = (UChar )0;
! 233: }
! 234: }
! 235:
! 236: static UChar*
! 237: strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
! 238: {
! 239: int slen, term_len, i;
! 240: UChar *r;
! 241:
! 242: slen = end - s;
! 243: term_len = ONIGENC_MBC_MINLEN(enc);
! 244:
! 245: r = (UChar* )xmalloc(slen + term_len);
! 246: CHECK_NULL_RETURN(r);
! 247: xmemcpy(r, s, slen);
! 248:
! 249: for (i = 0; i < term_len; i++)
! 250: r[slen + i] = (UChar )0;
! 251:
! 252: return r;
! 253: }
! 254:
! 255:
! 256: /* scan pattern methods */
! 257: #define PEND_VALUE 0
! 258:
! 259: #define PFETCH_READY UChar* pfetch_prev
! 260: #define PEND (p < end ? 0 : 1)
! 261: #define PUNFETCH p = pfetch_prev
! 262: #define PINC do { \
! 263: pfetch_prev = p; \
! 264: p += ONIGENC_MBC_ENC_LEN(enc, p); \
! 265: } while (0)
! 266: #define PFETCH(c) do { \
! 267: c = ONIGENC_MBC_TO_CODE(enc, p, end); \
! 268: pfetch_prev = p; \
! 269: p += ONIGENC_MBC_ENC_LEN(enc, p); \
! 270: } while (0)
! 271:
! 272: #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
! 273: #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
! 274:
! 275: static UChar*
! 276: k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
! 277: int capa)
! 278: {
! 279: UChar* r;
! 280:
! 281: if (dest)
! 282: r = (UChar* )xrealloc(dest, capa + 1);
! 283: else
! 284: r = (UChar* )xmalloc(capa + 1);
! 285:
! 286: CHECK_NULL_RETURN(r);
! 287: k_strcpy(r + (dest_end - dest), src, src_end);
! 288: return r;
! 289: }
! 290:
! 291: /* dest on static area */
! 292: static UChar*
! 293: strcat_capa_from_static(UChar* dest, UChar* dest_end,
! 294: const UChar* src, const UChar* src_end, int capa)
! 295: {
! 296: UChar* r;
! 297:
! 298: r = (UChar* )xmalloc(capa + 1);
! 299: CHECK_NULL_RETURN(r);
! 300: k_strcpy(r, dest, dest_end);
! 301: k_strcpy(r + (dest_end - dest), src, src_end);
! 302: return r;
! 303: }
! 304:
! 305: #ifdef USE_NAMED_GROUP
! 306:
! 307: #define INIT_NAME_BACKREFS_ALLOC_NUM 8
! 308:
! 309: typedef struct {
! 310: UChar* name;
! 311: int name_len; /* byte length */
! 312: int back_num; /* number of backrefs */
! 313: int back_alloc;
! 314: int back_ref1;
! 315: int* back_refs;
! 316: } NameEntry;
! 317:
! 318: #ifdef USE_ST_HASH_TABLE
! 319:
! 320: #include "st.h"
! 321:
! 322: typedef struct {
! 323: unsigned char* s;
! 324: unsigned char* end;
! 325: } st_strend_key;
! 326:
! 327: static int strend_cmp(st_strend_key*, st_strend_key*);
! 328: static int strend_hash(st_strend_key*);
! 329:
! 330: static struct st_hash_type type_strend_hash = {
! 331: strend_cmp,
! 332: strend_hash,
! 333: };
! 334:
! 335: static st_table*
! 336: onig_st_init_strend_table_with_size(int size)
! 337: {
! 338: return onig_st_init_table_with_size(&type_strend_hash, size);
! 339: }
! 340:
! 341: static int
! 342: onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
! 343: {
! 344: st_strend_key key;
! 345:
! 346: key.s = (unsigned char* )str_key;
! 347: key.end = (unsigned char* )end_key;
! 348:
! 349: return onig_st_lookup(table, (st_data_t )(&key), value);
! 350: }
! 351:
! 352: static int
! 353: onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
! 354: {
! 355: st_strend_key* key;
! 356: int result;
! 357:
! 358: key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
! 359: key->s = (unsigned char* )str_key;
! 360: key->end = (unsigned char* )end_key;
! 361: result = onig_st_insert(table, (st_data_t )key, value);
! 362: if (result) {
! 363: xfree(key);
! 364: }
! 365: return result;
! 366: }
! 367:
! 368: static int
! 369: strend_cmp(st_strend_key* x, st_strend_key* y)
! 370: {
! 371: unsigned char *p, *q;
! 372: int c;
! 373:
! 374: if ((x->end - x->s) != (y->end - y->s))
! 375: return 1;
! 376:
! 377: p = x->s;
! 378: q = y->s;
! 379: while (p < x->end) {
! 380: c = (int )*p - (int )*q;
! 381: if (c != 0) return c;
! 382:
! 383: p++; q++;
! 384: }
! 385:
! 386: return 0;
! 387: }
! 388:
! 389: static int
! 390: strend_hash(st_strend_key* x)
! 391: {
! 392: int val;
! 393: unsigned char *p;
! 394:
! 395: val = 0;
! 396: p = x->s;
! 397: while (p < x->end) {
! 398: val = val * 997 + (int )*p++;
! 399: }
! 400:
! 401: return val + (val >> 5);
! 402: }
! 403:
! 404: typedef st_table NameTable;
! 405: typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
! 406:
! 407: #define NAMEBUF_SIZE 24
! 408: #define NAMEBUF_SIZE_1 25
! 409:
! 410: #ifdef ONIG_DEBUG
! 411: static int
! 412: i_print_name_entry(UChar* key, NameEntry* e, void* arg)
! 413: {
! 414: int i;
! 415: FILE* fp = (FILE* )arg;
! 416:
! 417: fprintf(fp, "%s: ", e->name);
! 418: if (e->back_num == 0)
! 419: fputs("-", fp);
! 420: else if (e->back_num == 1)
! 421: fprintf(fp, "%d", e->back_ref1);
! 422: else {
! 423: for (i = 0; i < e->back_num; i++) {
! 424: if (i > 0) fprintf(fp, ", ");
! 425: fprintf(fp, "%d", e->back_refs[i]);
! 426: }
! 427: }
! 428: fputs("\n", fp);
! 429: return ST_CONTINUE;
! 430: }
! 431:
! 432: extern int
! 433: onig_print_names(FILE* fp, regex_t* reg)
! 434: {
! 435: NameTable* t = (NameTable* )reg->name_table;
! 436:
! 437: if (IS_NOT_NULL(t)) {
! 438: fprintf(fp, "name table\n");
! 439: onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
! 440: fputs("\n", fp);
! 441: }
! 442: return 0;
! 443: }
! 444: #endif
! 445:
! 446: static int
! 447: i_free_name_entry(UChar* key, NameEntry* e, void* arg)
! 448: {
! 449: xfree(e->name);
! 450: if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
! 451: xfree(key);
! 452: xfree(e);
! 453: return ST_DELETE;
! 454: }
! 455:
! 456: static int
! 457: names_clear(regex_t* reg)
! 458: {
! 459: NameTable* t = (NameTable* )reg->name_table;
! 460:
! 461: if (IS_NOT_NULL(t)) {
! 462: onig_st_foreach(t, i_free_name_entry, 0);
! 463: }
! 464: return 0;
! 465: }
! 466:
! 467: extern int
! 468: onig_names_free(regex_t* reg)
! 469: {
! 470: int r;
! 471: NameTable* t;
! 472:
! 473: r = names_clear(reg);
! 474: if (r) return r;
! 475:
! 476: t = (NameTable* )reg->name_table;
! 477: if (IS_NOT_NULL(t)) onig_st_free_table(t);
! 478: reg->name_table = (void* )NULL;
! 479: return 0;
! 480: }
! 481:
! 482: static NameEntry*
! 483: name_find(regex_t* reg, const UChar* name, const UChar* name_end)
! 484: {
! 485: NameEntry* e;
! 486: NameTable* t = (NameTable* )reg->name_table;
! 487:
! 488: e = (NameEntry* )NULL;
! 489: if (IS_NOT_NULL(t)) {
! 490: onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
! 491: }
! 492: return e;
! 493: }
! 494:
! 495: typedef struct {
! 496: int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
! 497: regex_t* reg;
! 498: void* arg;
! 499: int ret;
! 500: OnigEncoding enc;
! 501: } INamesArg;
! 502:
! 503: static int
! 504: i_names(UChar* key, NameEntry* e, INamesArg* arg)
! 505: {
! 506: int r = (*(arg->func))(e->name,
! 507: /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
! 508: e->name + e->name_len,
! 509: e->back_num,
! 510: (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
! 511: arg->reg, arg->arg);
! 512: if (r != 0) {
! 513: arg->ret = r;
! 514: return ST_STOP;
! 515: }
! 516: return ST_CONTINUE;
! 517: }
! 518:
! 519: extern int
! 520: onig_foreach_name(regex_t* reg,
! 521: int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
! 522: void* arg)
! 523: {
! 524: INamesArg narg;
! 525: NameTable* t = (NameTable* )reg->name_table;
! 526:
! 527: narg.ret = 0;
! 528: if (IS_NOT_NULL(t)) {
! 529: narg.func = func;
! 530: narg.reg = reg;
! 531: narg.arg = arg;
! 532: narg.enc = reg->enc; /* should be pattern encoding. */
! 533: onig_st_foreach(t, i_names, (HashDataType )&narg);
! 534: }
! 535: return narg.ret;
! 536: }
! 537:
! 538: static int
! 539: i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
! 540: {
! 541: int i;
! 542:
! 543: if (e->back_num > 1) {
! 544: for (i = 0; i < e->back_num; i++) {
! 545: e->back_refs[i] = map[e->back_refs[i]].new_val;
! 546: }
! 547: }
! 548: else if (e->back_num == 1) {
! 549: e->back_ref1 = map[e->back_ref1].new_val;
! 550: }
! 551:
! 552: return ST_CONTINUE;
! 553: }
! 554:
! 555: extern int
! 556: onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
! 557: {
! 558: NameTable* t = (NameTable* )reg->name_table;
! 559:
! 560: if (IS_NOT_NULL(t)) {
! 561: onig_st_foreach(t, i_renumber_name, (HashDataType )map);
! 562: }
! 563: return 0;
! 564: }
! 565:
! 566:
! 567: extern int
! 568: onig_number_of_names(regex_t* reg)
! 569: {
! 570: NameTable* t = (NameTable* )reg->name_table;
! 571:
! 572: if (IS_NOT_NULL(t))
! 573: return t->num_entries;
! 574: else
! 575: return 0;
! 576: }
! 577:
! 578: #else /* USE_ST_HASH_TABLE */
! 579:
! 580: #define INIT_NAMES_ALLOC_NUM 8
! 581:
! 582: typedef struct {
! 583: NameEntry* e;
! 584: int num;
! 585: int alloc;
! 586: } NameTable;
! 587:
! 588:
! 589: #ifdef ONIG_DEBUG
! 590: extern int
! 591: onig_print_names(FILE* fp, regex_t* reg)
! 592: {
! 593: int i, j;
! 594: NameEntry* e;
! 595: NameTable* t = (NameTable* )reg->name_table;
! 596:
! 597: if (IS_NOT_NULL(t) && t->num > 0) {
! 598: fprintf(fp, "name table\n");
! 599: for (i = 0; i < t->num; i++) {
! 600: e = &(t->e[i]);
! 601: fprintf(fp, "%s: ", e->name);
! 602: if (e->back_num == 0) {
! 603: fputs("-", fp);
! 604: }
! 605: else if (e->back_num == 1) {
! 606: fprintf(fp, "%d", e->back_ref1);
! 607: }
! 608: else {
! 609: for (j = 0; j < e->back_num; j++) {
! 610: if (j > 0) fprintf(fp, ", ");
! 611: fprintf(fp, "%d", e->back_refs[j]);
! 612: }
! 613: }
! 614: fputs("\n", fp);
! 615: }
! 616: fputs("\n", fp);
! 617: }
! 618: return 0;
! 619: }
! 620: #endif
! 621:
! 622: static int
! 623: names_clear(regex_t* reg)
! 624: {
! 625: int i;
! 626: NameEntry* e;
! 627: NameTable* t = (NameTable* )reg->name_table;
! 628:
! 629: if (IS_NOT_NULL(t)) {
! 630: for (i = 0; i < t->num; i++) {
! 631: e = &(t->e[i]);
! 632: if (IS_NOT_NULL(e->name)) {
! 633: xfree(e->name);
! 634: e->name = NULL;
! 635: e->name_len = 0;
! 636: e->back_num = 0;
! 637: e->back_alloc = 0;
! 638: if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
! 639: e->back_refs = (int* )NULL;
! 640: }
! 641: }
! 642: if (IS_NOT_NULL(t->e)) {
! 643: xfree(t->e);
! 644: t->e = NULL;
! 645: }
! 646: t->num = 0;
! 647: }
! 648: return 0;
! 649: }
! 650:
! 651: extern int
! 652: onig_names_free(regex_t* reg)
! 653: {
! 654: int r;
! 655: NameTable* t;
! 656:
! 657: r = names_clear(reg);
! 658: if (r) return r;
! 659:
! 660: t = (NameTable* )reg->name_table;
! 661: if (IS_NOT_NULL(t)) xfree(t);
! 662: reg->name_table = NULL;
! 663: return 0;
! 664: }
! 665:
! 666: static NameEntry*
! 667: name_find(regex_t* reg, UChar* name, UChar* name_end)
! 668: {
! 669: int i, len;
! 670: NameEntry* e;
! 671: NameTable* t = (NameTable* )reg->name_table;
! 672:
! 673: if (IS_NOT_NULL(t)) {
! 674: len = name_end - name;
! 675: for (i = 0; i < t->num; i++) {
! 676: e = &(t->e[i]);
! 677: if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
! 678: return e;
! 679: }
! 680: }
! 681: return (NameEntry* )NULL;
! 682: }
! 683:
! 684: extern int
! 685: onig_foreach_name(regex_t* reg,
! 686: int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
! 687: void* arg)
! 688: {
! 689: int i, r;
! 690: NameEntry* e;
! 691: NameTable* t = (NameTable* )reg->name_table;
! 692:
! 693: if (IS_NOT_NULL(t)) {
! 694: for (i = 0; i < t->num; i++) {
! 695: e = &(t->e[i]);
! 696: r = (*func)(e->name, e->name + e->name_len, e->back_num,
! 697: (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
! 698: reg, arg);
! 699: if (r != 0) return r;
! 700: }
! 701: }
! 702: return 0;
! 703: }
! 704:
! 705: extern int
! 706: onig_number_of_names(regex_t* reg)
! 707: {
! 708: NameTable* t = (NameTable* )reg->name_table;
! 709:
! 710: if (IS_NOT_NULL(t))
! 711: return t->num;
! 712: else
! 713: return 0;
! 714: }
! 715:
! 716: #endif /* else USE_ST_HASH_TABLE */
! 717:
! 718: static int
! 719: name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
! 720: {
! 721: int alloc;
! 722: NameEntry* e;
! 723: NameTable* t = (NameTable* )reg->name_table;
! 724:
! 725: if (name_end - name <= 0)
! 726: return ONIGERR_EMPTY_GROUP_NAME;
! 727:
! 728: e = name_find(reg, name, name_end);
! 729: if (IS_NULL(e)) {
! 730: #ifdef USE_ST_HASH_TABLE
! 731: if (IS_NULL(t)) {
! 732: t = onig_st_init_strend_table_with_size(5);
! 733: reg->name_table = (void* )t;
! 734: }
! 735: e = (NameEntry* )xmalloc(sizeof(NameEntry));
! 736: CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
! 737:
! 738: e->name = strdup_with_null(reg->enc, name, name_end);
! 739: if (IS_NULL(e->name)) return ONIGERR_MEMORY;
! 740: onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
! 741: (HashDataType )e);
! 742:
! 743: e->name_len = name_end - name;
! 744: e->back_num = 0;
! 745: e->back_alloc = 0;
! 746: e->back_refs = (int* )NULL;
! 747:
! 748: #else
! 749:
! 750: if (IS_NULL(t)) {
! 751: alloc = INIT_NAMES_ALLOC_NUM;
! 752: t = (NameTable* )xmalloc(sizeof(NameTable));
! 753: CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
! 754: t->e = NULL;
! 755: t->alloc = 0;
! 756: t->num = 0;
! 757:
! 758: t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
! 759: if (IS_NULL(t->e)) {
! 760: xfree(t);
! 761: return ONIGERR_MEMORY;
! 762: }
! 763: t->alloc = alloc;
! 764: reg->name_table = t;
! 765: goto clear;
! 766: }
! 767: else if (t->num == t->alloc) {
! 768: int i;
! 769:
! 770: alloc = t->alloc * 2;
! 771: t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
! 772: CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY);
! 773: t->alloc = alloc;
! 774:
! 775: clear:
! 776: for (i = t->num; i < t->alloc; i++) {
! 777: t->e[i].name = NULL;
! 778: t->e[i].name_len = 0;
! 779: t->e[i].back_num = 0;
! 780: t->e[i].back_alloc = 0;
! 781: t->e[i].back_refs = (int* )NULL;
! 782: }
! 783: }
! 784: e = &(t->e[t->num]);
! 785: t->num++;
! 786: e->name = strdup_with_null(reg->enc, name, name_end);
! 787: e->name_len = name_end - name;
! 788: #endif
! 789: }
! 790:
! 791: if (e->back_num >= 1 &&
! 792: ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
! 793: onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
! 794: name, name_end);
! 795: return ONIGERR_MULTIPLEX_DEFINED_NAME;
! 796: }
! 797:
! 798: e->back_num++;
! 799: if (e->back_num == 1) {
! 800: e->back_ref1 = backref;
! 801: }
! 802: else {
! 803: if (e->back_num == 2) {
! 804: alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
! 805: e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
! 806: CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
! 807: e->back_alloc = alloc;
! 808: e->back_refs[0] = e->back_ref1;
! 809: e->back_refs[1] = backref;
! 810: }
! 811: else {
! 812: if (e->back_num > e->back_alloc) {
! 813: alloc = e->back_alloc * 2;
! 814: e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
! 815: CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
! 816: e->back_alloc = alloc;
! 817: }
! 818: e->back_refs[e->back_num - 1] = backref;
! 819: }
! 820: }
! 821:
! 822: return 0;
! 823: }
! 824:
! 825: extern int
! 826: onig_name_to_group_numbers(regex_t* reg, const UChar* name,
! 827: const UChar* name_end, int** nums)
! 828: {
! 829: NameEntry* e;
! 830:
! 831: e = name_find(reg, name, name_end);
! 832: if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
! 833:
! 834: switch (e->back_num) {
! 835: case 0:
! 836: break;
! 837: case 1:
! 838: *nums = &(e->back_ref1);
! 839: break;
! 840: default:
! 841: *nums = e->back_refs;
! 842: break;
! 843: }
! 844: return e->back_num;
! 845: }
! 846:
! 847: extern int
! 848: onig_name_to_backref_number(regex_t* reg, const UChar* name,
! 849: const UChar* name_end, OnigRegion *region)
! 850: {
! 851: int i, n, *nums;
! 852:
! 853: n = onig_name_to_group_numbers(reg, name, name_end, &nums);
! 854: if (n < 0)
! 855: return n;
! 856: else if (n == 0)
! 857: return ONIGERR_PARSER_BUG;
! 858: else if (n == 1)
! 859: return nums[0];
! 860: else {
! 861: if (IS_NOT_NULL(region)) {
! 862: for (i = n - 1; i >= 0; i--) {
! 863: if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
! 864: return nums[i];
! 865: }
! 866: }
! 867: return nums[n - 1];
! 868: }
! 869: }
! 870:
! 871: #else /* USE_NAMED_GROUP */
! 872:
! 873: extern int
! 874: onig_name_to_group_numbers(regex_t* reg, const UChar* name,
! 875: const UChar* name_end, int** nums)
! 876: {
! 877: return ONIG_NO_SUPPORT_CONFIG;
! 878: }
! 879:
! 880: extern int
! 881: onig_name_to_backref_number(regex_t* reg, const UChar* name,
! 882: const UChar* name_end, OnigRegion* region)
! 883: {
! 884: return ONIG_NO_SUPPORT_CONFIG;
! 885: }
! 886:
! 887: extern int
! 888: onig_foreach_name(regex_t* reg,
! 889: int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
! 890: void* arg)
! 891: {
! 892: return ONIG_NO_SUPPORT_CONFIG;
! 893: }
! 894:
! 895: extern int
! 896: onig_number_of_names(regex_t* reg)
! 897: {
! 898: return 0;
! 899: }
! 900: #endif /* else USE_NAMED_GROUP */
! 901:
! 902: extern int
! 903: onig_noname_group_capture_is_active(regex_t* reg)
! 904: {
! 905: if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
! 906: return 0;
! 907:
! 908: #ifdef USE_NAMED_GROUP
! 909: if (onig_number_of_names(reg) > 0 &&
! 910: IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
! 911: !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
! 912: return 0;
! 913: }
! 914: #endif
! 915:
! 916: return 1;
! 917: }
! 918:
! 919:
! 920: #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
! 921:
! 922: static void
! 923: scan_env_clear(ScanEnv* env)
! 924: {
! 925: int i;
! 926:
! 927: BIT_STATUS_CLEAR(env->capture_history);
! 928: BIT_STATUS_CLEAR(env->bt_mem_start);
! 929: BIT_STATUS_CLEAR(env->bt_mem_end);
! 930: BIT_STATUS_CLEAR(env->backrefed_mem);
! 931: env->error = (UChar* )NULL;
! 932: env->error_end = (UChar* )NULL;
! 933: env->num_call = 0;
! 934: env->num_mem = 0;
! 935: #ifdef USE_NAMED_GROUP
! 936: env->num_named = 0;
! 937: #endif
! 938: env->mem_alloc = 0;
! 939: env->mem_nodes_dynamic = (Node** )NULL;
! 940:
! 941: for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
! 942: env->mem_nodes_static[i] = NULL_NODE;
! 943:
! 944: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 945: env->num_comb_exp_check = 0;
! 946: env->comb_exp_max_regnum = 0;
! 947: env->curr_max_regnum = 0;
! 948: env->has_recursion = 0;
! 949: #endif
! 950: }
! 951:
! 952: static int
! 953: scan_env_add_mem_entry(ScanEnv* env)
! 954: {
! 955: int i, need, alloc;
! 956: Node** p;
! 957:
! 958: need = env->num_mem + 1;
! 959: if (need >= SCANENV_MEMNODES_SIZE) {
! 960: if (env->mem_alloc <= need) {
! 961: if (IS_NULL(env->mem_nodes_dynamic)) {
! 962: alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
! 963: p = (Node** )xmalloc(sizeof(Node*) * alloc);
! 964: xmemcpy(p, env->mem_nodes_static,
! 965: sizeof(Node*) * SCANENV_MEMNODES_SIZE);
! 966: }
! 967: else {
! 968: alloc = env->mem_alloc * 2;
! 969: p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
! 970: }
! 971: CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
! 972:
! 973: for (i = env->num_mem + 1; i < alloc; i++)
! 974: p[i] = NULL_NODE;
! 975:
! 976: env->mem_nodes_dynamic = p;
! 977: env->mem_alloc = alloc;
! 978: }
! 979: }
! 980:
! 981: env->num_mem++;
! 982: return env->num_mem;
! 983: }
! 984:
! 985: static int
! 986: scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
! 987: {
! 988: if (env->num_mem >= num)
! 989: SCANENV_MEM_NODES(env)[num] = node;
! 990: else
! 991: return ONIGERR_PARSER_BUG;
! 992: return 0;
! 993: }
! 994:
! 995:
! 996: #ifdef USE_RECYCLE_NODE
! 997: typedef struct _FreeNode {
! 998: struct _FreeNode* next;
! 999: } FreeNode;
! 1000:
! 1001: static FreeNode* FreeNodeList = (FreeNode* )NULL;
! 1002: #endif
! 1003:
! 1004: extern void
! 1005: onig_node_free(Node* node)
! 1006: {
! 1007: start:
! 1008: if (IS_NULL(node)) return ;
! 1009:
! 1010: switch (NTYPE(node)) {
! 1011: case N_STRING:
! 1012: if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
! 1013: xfree(NSTRING(node).s);
! 1014: }
! 1015: break;
! 1016:
! 1017: case N_LIST:
! 1018: case N_ALT:
! 1019: onig_node_free(NCONS(node).left);
! 1020: /* onig_node_free(NCONS(node).right); */
! 1021: {
! 1022: Node* next_node = NCONS(node).right;
! 1023:
! 1024: #ifdef USE_RECYCLE_NODE
! 1025: {
! 1026: FreeNode* n = (FreeNode* )node;
! 1027:
! 1028: THREAD_ATOMIC_START;
! 1029: n->next = FreeNodeList;
! 1030: FreeNodeList = n;
! 1031: THREAD_ATOMIC_END;
! 1032: }
! 1033: #else
! 1034: xfree(node);
! 1035: #endif
! 1036:
! 1037: node = next_node;
! 1038: goto start;
! 1039: }
! 1040: break;
! 1041:
! 1042: case N_CCLASS:
! 1043: {
! 1044: CClassNode* cc = &(NCCLASS(node));
! 1045:
! 1046: if (IS_CCLASS_SHARE(cc))
! 1047: return ;
! 1048:
! 1049: if (cc->mbuf)
! 1050: bbuf_free(cc->mbuf);
! 1051: }
! 1052: break;
! 1053:
! 1054: case N_QUANTIFIER:
! 1055: if (NQUANTIFIER(node).target)
! 1056: onig_node_free(NQUANTIFIER(node).target);
! 1057: break;
! 1058:
! 1059: case N_EFFECT:
! 1060: if (NEFFECT(node).target)
! 1061: onig_node_free(NEFFECT(node).target);
! 1062: break;
! 1063:
! 1064: case N_BACKREF:
! 1065: if (IS_NOT_NULL(NBACKREF(node).back_dynamic))
! 1066: xfree(NBACKREF(node).back_dynamic);
! 1067: break;
! 1068:
! 1069: case N_ANCHOR:
! 1070: if (NANCHOR(node).target)
! 1071: onig_node_free(NANCHOR(node).target);
! 1072: break;
! 1073: }
! 1074:
! 1075: #ifdef USE_RECYCLE_NODE
! 1076: {
! 1077: FreeNode* n = (FreeNode* )node;
! 1078:
! 1079: THREAD_ATOMIC_START;
! 1080: n->next = FreeNodeList;
! 1081: FreeNodeList = n;
! 1082: THREAD_ATOMIC_END;
! 1083: }
! 1084: #else
! 1085: xfree(node);
! 1086: #endif
! 1087: }
! 1088:
! 1089: #ifdef USE_RECYCLE_NODE
! 1090: extern int
! 1091: onig_free_node_list(void)
! 1092: {
! 1093: FreeNode* n;
! 1094:
! 1095: /* THREAD_ATOMIC_START; */
! 1096: while (IS_NOT_NULL(FreeNodeList)) {
! 1097: n = FreeNodeList;
! 1098: FreeNodeList = FreeNodeList->next;
! 1099: xfree(n);
! 1100: }
! 1101: /* THREAD_ATOMIC_END; */
! 1102: return 0;
! 1103: }
! 1104: #endif
! 1105:
! 1106: static Node*
! 1107: node_new(void)
! 1108: {
! 1109: Node* node;
! 1110:
! 1111: #ifdef USE_RECYCLE_NODE
! 1112: THREAD_ATOMIC_START;
! 1113: if (IS_NOT_NULL(FreeNodeList)) {
! 1114: node = (Node* )FreeNodeList;
! 1115: FreeNodeList = FreeNodeList->next;
! 1116: THREAD_ATOMIC_END;
! 1117: return node;
! 1118: }
! 1119: THREAD_ATOMIC_END;
! 1120: #endif
! 1121:
! 1122: node = (Node* )xmalloc(sizeof(Node));
! 1123: return node;
! 1124: }
! 1125:
! 1126:
! 1127: static void
! 1128: initialize_cclass(CClassNode* cc)
! 1129: {
! 1130: BITSET_CLEAR(cc->bs);
! 1131: cc->flags = 0;
! 1132: cc->mbuf = NULL;
! 1133: }
! 1134:
! 1135: static Node*
! 1136: node_new_cclass(void)
! 1137: {
! 1138: Node* node = node_new();
! 1139: CHECK_NULL_RETURN(node);
! 1140: node->type = N_CCLASS;
! 1141:
! 1142: initialize_cclass(&(NCCLASS(node)));
! 1143: return node;
! 1144: }
! 1145:
! 1146: static Node*
! 1147: node_new_cclass_by_codepoint_range(int not,
! 1148: const OnigCodePoint sbr[], const OnigCodePoint mbr[])
! 1149: {
! 1150: CClassNode* cc;
! 1151: int n, i, j;
! 1152:
! 1153: Node* node = node_new();
! 1154: CHECK_NULL_RETURN(node);
! 1155: node->type = N_CCLASS;
! 1156:
! 1157: cc = &(NCCLASS(node));
! 1158: cc->flags = 0;
! 1159: if (not != 0) CCLASS_SET_NOT(cc);
! 1160:
! 1161: BITSET_CLEAR(cc->bs);
! 1162: if (IS_NOT_NULL(sbr)) {
! 1163: n = ONIGENC_CODE_RANGE_NUM(sbr);
! 1164: for (i = 0; i < n; i++) {
! 1165: for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
! 1166: j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
! 1167: BITSET_SET_BIT(cc->bs, j);
! 1168: }
! 1169: }
! 1170: }
! 1171:
! 1172: if (IS_NULL(mbr)) {
! 1173: is_null:
! 1174: cc->mbuf = NULL;
! 1175: }
! 1176: else {
! 1177: BBuf* bbuf;
! 1178:
! 1179: n = ONIGENC_CODE_RANGE_NUM(mbr);
! 1180: if (n == 0) goto is_null;
! 1181:
! 1182: bbuf = (BBuf* )xmalloc(sizeof(BBuf));
! 1183: CHECK_NULL_RETURN_VAL(bbuf, NULL);
! 1184: bbuf->alloc = n + 1;
! 1185: bbuf->used = n + 1;
! 1186: bbuf->p = (UChar* )((void* )mbr);
! 1187:
! 1188: cc->mbuf = bbuf;
! 1189: }
! 1190:
! 1191: return node;
! 1192: }
! 1193:
! 1194: static Node*
! 1195: node_new_ctype(int type)
! 1196: {
! 1197: Node* node = node_new();
! 1198: CHECK_NULL_RETURN(node);
! 1199: node->type = N_CTYPE;
! 1200: NCTYPE(node).type = type;
! 1201: return node;
! 1202: }
! 1203:
! 1204: static Node*
! 1205: node_new_anychar(void)
! 1206: {
! 1207: Node* node = node_new();
! 1208: CHECK_NULL_RETURN(node);
! 1209: node->type = N_ANYCHAR;
! 1210: return node;
! 1211: }
! 1212:
! 1213: static Node*
! 1214: node_new_list(Node* left, Node* right)
! 1215: {
! 1216: Node* node = node_new();
! 1217: CHECK_NULL_RETURN(node);
! 1218: node->type = N_LIST;
! 1219: NCONS(node).left = left;
! 1220: NCONS(node).right = right;
! 1221: return node;
! 1222: }
! 1223:
! 1224: extern Node*
! 1225: onig_node_new_list(Node* left, Node* right)
! 1226: {
! 1227: return node_new_list(left, right);
! 1228: }
! 1229:
! 1230: static Node*
! 1231: node_new_alt(Node* left, Node* right)
! 1232: {
! 1233: Node* node = node_new();
! 1234: CHECK_NULL_RETURN(node);
! 1235: node->type = N_ALT;
! 1236: NCONS(node).left = left;
! 1237: NCONS(node).right = right;
! 1238: return node;
! 1239: }
! 1240:
! 1241: extern Node*
! 1242: onig_node_new_anchor(int type)
! 1243: {
! 1244: Node* node = node_new();
! 1245: CHECK_NULL_RETURN(node);
! 1246: node->type = N_ANCHOR;
! 1247: NANCHOR(node).type = type;
! 1248: NANCHOR(node).target = NULL;
! 1249: NANCHOR(node).char_len = -1;
! 1250: return node;
! 1251: }
! 1252:
! 1253: static Node*
! 1254: node_new_backref(int back_num, int* backrefs, int by_name,
! 1255: #ifdef USE_BACKREF_AT_LEVEL
! 1256: int exist_level, int nest_level,
! 1257: #endif
! 1258: ScanEnv* env)
! 1259: {
! 1260: int i;
! 1261: Node* node = node_new();
! 1262:
! 1263: CHECK_NULL_RETURN(node);
! 1264: node->type = N_BACKREF;
! 1265: NBACKREF(node).state = 0;
! 1266: NBACKREF(node).back_num = back_num;
! 1267: NBACKREF(node).back_dynamic = (int* )NULL;
! 1268: if (by_name != 0)
! 1269: NBACKREF(node).state |= NST_NAME_REF;
! 1270:
! 1271: #ifdef USE_BACKREF_AT_LEVEL
! 1272: if (exist_level != 0) {
! 1273: NBACKREF(node).state |= NST_NEST_LEVEL;
! 1274: NBACKREF(node).nest_level = nest_level;
! 1275: }
! 1276: #endif
! 1277:
! 1278: for (i = 0; i < back_num; i++) {
! 1279: if (backrefs[i] <= env->num_mem &&
! 1280: IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
! 1281: NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */
! 1282: break;
! 1283: }
! 1284: }
! 1285:
! 1286: if (back_num <= NODE_BACKREFS_SIZE) {
! 1287: for (i = 0; i < back_num; i++)
! 1288: NBACKREF(node).back_static[i] = backrefs[i];
! 1289: }
! 1290: else {
! 1291: int* p = (int* )xmalloc(sizeof(int) * back_num);
! 1292: if (IS_NULL(p)) {
! 1293: onig_node_free(node);
! 1294: return NULL;
! 1295: }
! 1296: NBACKREF(node).back_dynamic = p;
! 1297: for (i = 0; i < back_num; i++)
! 1298: p[i] = backrefs[i];
! 1299: }
! 1300: return node;
! 1301: }
! 1302:
! 1303: #ifdef USE_SUBEXP_CALL
! 1304: static Node*
! 1305: node_new_call(UChar* name, UChar* name_end)
! 1306: {
! 1307: Node* node = node_new();
! 1308: CHECK_NULL_RETURN(node);
! 1309:
! 1310: node->type = N_CALL;
! 1311: NCALL(node).state = 0;
! 1312: NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF;
! 1313: NCALL(node).target = NULL_NODE;
! 1314: NCALL(node).name = name;
! 1315: NCALL(node).name_end = name_end;
! 1316: return node;
! 1317: }
! 1318: #endif
! 1319:
! 1320: static Node*
! 1321: node_new_quantifier(int lower, int upper, int by_number)
! 1322: {
! 1323: Node* node = node_new();
! 1324: CHECK_NULL_RETURN(node);
! 1325: node->type = N_QUANTIFIER;
! 1326: NQUANTIFIER(node).state = 0;
! 1327: NQUANTIFIER(node).target = NULL;
! 1328: NQUANTIFIER(node).lower = lower;
! 1329: NQUANTIFIER(node).upper = upper;
! 1330: NQUANTIFIER(node).greedy = 1;
! 1331: NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
! 1332: NQUANTIFIER(node).head_exact = NULL_NODE;
! 1333: NQUANTIFIER(node).next_head_exact = NULL_NODE;
! 1334: NQUANTIFIER(node).is_refered = 0;
! 1335: if (by_number != 0)
! 1336: NQUANTIFIER(node).state |= NST_BY_NUMBER;
! 1337:
! 1338: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 1339: NQUANTIFIER(node).comb_exp_check_num = 0;
! 1340: #endif
! 1341:
! 1342: return node;
! 1343: }
! 1344:
! 1345: static Node*
! 1346: node_new_effect(int type)
! 1347: {
! 1348: Node* node = node_new();
! 1349: CHECK_NULL_RETURN(node);
! 1350: node->type = N_EFFECT;
! 1351: NEFFECT(node).type = type;
! 1352: NEFFECT(node).state = 0;
! 1353: NEFFECT(node).regnum = 0;
! 1354: NEFFECT(node).option = 0;
! 1355: NEFFECT(node).target = NULL;
! 1356: NEFFECT(node).call_addr = -1;
! 1357: NEFFECT(node).opt_count = 0;
! 1358: return node;
! 1359: }
! 1360:
! 1361: extern Node*
! 1362: onig_node_new_effect(int type)
! 1363: {
! 1364: return node_new_effect(type);
! 1365: }
! 1366:
! 1367: static Node*
! 1368: node_new_effect_memory(OnigOptionType option, int is_named)
! 1369: {
! 1370: Node* node = node_new_effect(EFFECT_MEMORY);
! 1371: CHECK_NULL_RETURN(node);
! 1372: if (is_named != 0)
! 1373: SET_EFFECT_STATUS(node, NST_NAMED_GROUP);
! 1374:
! 1375: #ifdef USE_SUBEXP_CALL
! 1376: NEFFECT(node).option = option;
! 1377: #endif
! 1378: return node;
! 1379: }
! 1380:
! 1381: static Node*
! 1382: node_new_option(OnigOptionType option)
! 1383: {
! 1384: Node* node = node_new_effect(EFFECT_OPTION);
! 1385: CHECK_NULL_RETURN(node);
! 1386: NEFFECT(node).option = option;
! 1387: return node;
! 1388: }
! 1389:
! 1390: extern int
! 1391: onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
! 1392: {
! 1393: int addlen = end - s;
! 1394:
! 1395: if (addlen > 0) {
! 1396: int len = NSTRING(node).end - NSTRING(node).s;
! 1397:
! 1398: if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
! 1399: UChar* p;
! 1400: int capa = len + addlen + NODE_STR_MARGIN;
! 1401:
! 1402: if (capa <= NSTRING(node).capa) {
! 1403: k_strcpy(NSTRING(node).s + len, s, end);
! 1404: }
! 1405: else {
! 1406: if (NSTRING(node).s == NSTRING(node).buf)
! 1407: p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
! 1408: s, end, capa);
! 1409: else
! 1410: p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
! 1411:
! 1412: CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
! 1413: NSTRING(node).s = p;
! 1414: NSTRING(node).capa = capa;
! 1415: }
! 1416: }
! 1417: else {
! 1418: k_strcpy(NSTRING(node).s + len, s, end);
! 1419: }
! 1420: NSTRING(node).end = NSTRING(node).s + len + addlen;
! 1421: }
! 1422:
! 1423: return 0;
! 1424: }
! 1425:
! 1426: static int
! 1427: node_str_cat_char(Node* node, UChar c)
! 1428: {
! 1429: UChar s[1];
! 1430:
! 1431: s[0] = c;
! 1432: return onig_node_str_cat(node, s, s + 1);
! 1433: }
! 1434:
! 1435: extern void
! 1436: onig_node_conv_to_str_node(Node* node, int flag)
! 1437: {
! 1438: node->type = N_STRING;
! 1439:
! 1440: NSTRING(node).flag = flag;
! 1441: NSTRING(node).capa = 0;
! 1442: NSTRING(node).s = NSTRING(node).buf;
! 1443: NSTRING(node).end = NSTRING(node).buf;
! 1444: }
! 1445:
! 1446: extern void
! 1447: onig_node_str_clear(Node* node)
! 1448: {
! 1449: if (NSTRING(node).capa != 0 &&
! 1450: IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
! 1451: xfree(NSTRING(node).s);
! 1452: }
! 1453:
! 1454: NSTRING(node).capa = 0;
! 1455: NSTRING(node).flag = 0;
! 1456: NSTRING(node).s = NSTRING(node).buf;
! 1457: NSTRING(node).end = NSTRING(node).buf;
! 1458: }
! 1459:
! 1460: static Node*
! 1461: node_new_str(const UChar* s, const UChar* end)
! 1462: {
! 1463: Node* node = node_new();
! 1464: CHECK_NULL_RETURN(node);
! 1465:
! 1466: node->type = N_STRING;
! 1467: NSTRING(node).capa = 0;
! 1468: NSTRING(node).flag = 0;
! 1469: NSTRING(node).s = NSTRING(node).buf;
! 1470: NSTRING(node).end = NSTRING(node).buf;
! 1471: if (onig_node_str_cat(node, s, end)) {
! 1472: onig_node_free(node);
! 1473: return NULL;
! 1474: }
! 1475: return node;
! 1476: }
! 1477:
! 1478: extern Node*
! 1479: onig_node_new_str(const UChar* s, const UChar* end)
! 1480: {
! 1481: return node_new_str(s, end);
! 1482: }
! 1483:
! 1484: #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
! 1485: static Node*
! 1486: node_new_str_raw(UChar* s, UChar* end)
! 1487: {
! 1488: Node* node = node_new_str(s, end);
! 1489: NSTRING_SET_RAW(node);
! 1490: return node;
! 1491: }
! 1492: #endif
! 1493:
! 1494: static Node*
! 1495: node_new_empty(void)
! 1496: {
! 1497: return node_new_str(NULL, NULL);
! 1498: }
! 1499:
! 1500: static Node*
! 1501: node_new_str_char(UChar c)
! 1502: {
! 1503: UChar p[1];
! 1504:
! 1505: p[0] = c;
! 1506: return node_new_str(p, p + 1);
! 1507: }
! 1508:
! 1509: static Node*
! 1510: str_node_split_last_char(StrNode* sn, OnigEncoding enc)
! 1511: {
! 1512: const UChar *p;
! 1513: Node* n = NULL_NODE;
! 1514:
! 1515: if (sn->end > sn->s) {
! 1516: p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
! 1517: if (p && p > sn->s) { /* can be splitted. */
! 1518: n = node_new_str(p, sn->end);
! 1519: if ((sn->flag & NSTR_RAW) != 0)
! 1520: NSTRING_SET_RAW(n);
! 1521: sn->end = (UChar* )p;
! 1522: }
! 1523: }
! 1524: return n;
! 1525: }
! 1526:
! 1527: static int
! 1528: str_node_can_be_split(StrNode* sn, OnigEncoding enc)
! 1529: {
! 1530: if (sn->end > sn->s) {
! 1531: return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
! 1532: }
! 1533: return 0;
! 1534: }
! 1535:
! 1536: #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
! 1537: static int
! 1538: node_str_head_pad(StrNode* sn, int num, UChar val)
! 1539: {
! 1540: UChar buf[NODE_STR_BUF_SIZE];
! 1541: int i, len;
! 1542:
! 1543: len = sn->end - sn->s;
! 1544: onig_strcpy(buf, sn->s, sn->end);
! 1545: onig_strcpy(&(sn->s[num]), buf, buf + len);
! 1546: sn->end += num;
! 1547:
! 1548: for (i = 0; i < num; i++) {
! 1549: sn->s[i] = val;
! 1550: }
! 1551: }
! 1552: #endif
! 1553:
! 1554: extern int
! 1555: onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
! 1556: {
! 1557: unsigned int num, val;
! 1558: OnigCodePoint c;
! 1559: UChar* p = *src;
! 1560: PFETCH_READY;
! 1561:
! 1562: num = 0;
! 1563: while (!PEND) {
! 1564: PFETCH(c);
! 1565: if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
! 1566: val = (unsigned int )DIGITVAL(c);
! 1567: if ((INT_MAX_LIMIT - val) / 10UL < num)
! 1568: return -1; /* overflow */
! 1569:
! 1570: num = num * 10 + val;
! 1571: }
! 1572: else {
! 1573: PUNFETCH;
! 1574: break;
! 1575: }
! 1576: }
! 1577: *src = p;
! 1578: return num;
! 1579: }
! 1580:
! 1581: static int
! 1582: scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
! 1583: OnigEncoding enc)
! 1584: {
! 1585: OnigCodePoint c;
! 1586: unsigned int num, val;
! 1587: UChar* p = *src;
! 1588: PFETCH_READY;
! 1589:
! 1590: num = 0;
! 1591: while (!PEND && maxlen-- != 0) {
! 1592: PFETCH(c);
! 1593: if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
! 1594: val = (unsigned int )XDIGITVAL(enc,c);
! 1595: if ((INT_MAX_LIMIT - val) / 16UL < num)
! 1596: return -1; /* overflow */
! 1597:
! 1598: num = (num << 4) + XDIGITVAL(enc,c);
! 1599: }
! 1600: else {
! 1601: PUNFETCH;
! 1602: break;
! 1603: }
! 1604: }
! 1605: *src = p;
! 1606: return num;
! 1607: }
! 1608:
! 1609: static int
! 1610: scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
! 1611: OnigEncoding enc)
! 1612: {
! 1613: OnigCodePoint c;
! 1614: unsigned int num, val;
! 1615: UChar* p = *src;
! 1616: PFETCH_READY;
! 1617:
! 1618: num = 0;
! 1619: while (!PEND && maxlen-- != 0) {
! 1620: PFETCH(c);
! 1621: if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
! 1622: val = ODIGITVAL(c);
! 1623: if ((INT_MAX_LIMIT - val) / 8UL < num)
! 1624: return -1; /* overflow */
! 1625:
! 1626: num = (num << 3) + val;
! 1627: }
! 1628: else {
! 1629: PUNFETCH;
! 1630: break;
! 1631: }
! 1632: }
! 1633: *src = p;
! 1634: return num;
! 1635: }
! 1636:
! 1637:
! 1638: #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
! 1639: BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
! 1640:
! 1641: /* data format:
! 1642: [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
! 1643: (all data size is OnigCodePoint)
! 1644: */
! 1645: static int
! 1646: new_code_range(BBuf** pbuf)
! 1647: {
! 1648: #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
! 1649: int r;
! 1650: OnigCodePoint n;
! 1651: BBuf* bbuf;
! 1652:
! 1653: bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
! 1654: CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY);
! 1655: r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
! 1656: if (r) return r;
! 1657:
! 1658: n = 0;
! 1659: BBUF_WRITE_CODE_POINT(bbuf, 0, n);
! 1660: return 0;
! 1661: }
! 1662:
! 1663: static int
! 1664: add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
! 1665: {
! 1666: int r, inc_n, pos;
! 1667: int low, high, bound, x;
! 1668: OnigCodePoint n, *data;
! 1669: BBuf* bbuf;
! 1670:
! 1671: if (from > to) {
! 1672: n = from; from = to; to = n;
! 1673: }
! 1674:
! 1675: if (IS_NULL(*pbuf)) {
! 1676: r = new_code_range(pbuf);
! 1677: if (r) return r;
! 1678: bbuf = *pbuf;
! 1679: n = 0;
! 1680: }
! 1681: else {
! 1682: bbuf = *pbuf;
! 1683: GET_CODE_POINT(n, bbuf->p);
! 1684: }
! 1685: data = (OnigCodePoint* )(bbuf->p);
! 1686: data++;
! 1687:
! 1688: for (low = 0, bound = n; low < bound; ) {
! 1689: x = (low + bound) >> 1;
! 1690: if (from > data[x*2 + 1])
! 1691: low = x + 1;
! 1692: else
! 1693: bound = x;
! 1694: }
! 1695:
! 1696: for (high = low, bound = n; high < bound; ) {
! 1697: x = (high + bound) >> 1;
! 1698: if (to >= data[x*2] - 1)
! 1699: high = x + 1;
! 1700: else
! 1701: bound = x;
! 1702: }
! 1703:
! 1704: inc_n = low + 1 - high;
! 1705: if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
! 1706: return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
! 1707:
! 1708: if (inc_n != 1) {
! 1709: if (from > data[low*2])
! 1710: from = data[low*2];
! 1711: if (to < data[(high - 1)*2 + 1])
! 1712: to = data[(high - 1)*2 + 1];
! 1713: }
! 1714:
! 1715: if (inc_n != 0 && (OnigCodePoint )high < n) {
! 1716: int from_pos = SIZE_CODE_POINT * (1 + high * 2);
! 1717: int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
! 1718: int size = (n - high) * 2 * SIZE_CODE_POINT;
! 1719:
! 1720: if (inc_n > 0) {
! 1721: BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
! 1722: }
! 1723: else {
! 1724: BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
! 1725: }
! 1726: }
! 1727:
! 1728: pos = SIZE_CODE_POINT * (1 + low * 2);
! 1729: BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
! 1730: BBUF_WRITE_CODE_POINT(bbuf, pos, from);
! 1731: BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
! 1732: n += inc_n;
! 1733: BBUF_WRITE_CODE_POINT(bbuf, 0, n);
! 1734:
! 1735: return 0;
! 1736: }
! 1737:
! 1738: static int
! 1739: add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
! 1740: {
! 1741: if (from > to) {
! 1742: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
! 1743: return 0;
! 1744: else
! 1745: return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
! 1746: }
! 1747:
! 1748: return add_code_range_to_buf(pbuf, from, to);
! 1749: }
! 1750:
! 1751: static int
! 1752: not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
! 1753: {
! 1754: int r, i, n;
! 1755: OnigCodePoint pre, from, *data, to = 0;
! 1756:
! 1757: *pbuf = (BBuf* )NULL;
! 1758: if (IS_NULL(bbuf)) {
! 1759: set_all:
! 1760: return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
! 1761: }
! 1762:
! 1763: data = (OnigCodePoint* )(bbuf->p);
! 1764: GET_CODE_POINT(n, data);
! 1765: data++;
! 1766: if (n <= 0) goto set_all;
! 1767:
! 1768: r = 0;
! 1769: pre = MBCODE_START_POS(enc);
! 1770: for (i = 0; i < n; i++) {
! 1771: from = data[i*2];
! 1772: to = data[i*2+1];
! 1773: if (pre <= from - 1) {
! 1774: r = add_code_range_to_buf(pbuf, pre, from - 1);
! 1775: if (r != 0) return r;
! 1776: }
! 1777: if (to == ~((OnigCodePoint )0)) break;
! 1778: pre = to + 1;
! 1779: }
! 1780: if (to < ~((OnigCodePoint )0)) {
! 1781: r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
! 1782: }
! 1783: return r;
! 1784: }
! 1785:
! 1786: #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
! 1787: BBuf *tbuf; \
! 1788: int tnot; \
! 1789: tnot = not1; not1 = not2; not2 = tnot; \
! 1790: tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
! 1791: } while (0)
! 1792:
! 1793: static int
! 1794: or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
! 1795: BBuf* bbuf2, int not2, BBuf** pbuf)
! 1796: {
! 1797: int r;
! 1798: OnigCodePoint i, n1, *data1;
! 1799: OnigCodePoint from, to;
! 1800:
! 1801: *pbuf = (BBuf* )NULL;
! 1802: if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
! 1803: if (not1 != 0 || not2 != 0)
! 1804: return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
! 1805: return 0;
! 1806: }
! 1807:
! 1808: r = 0;
! 1809: if (IS_NULL(bbuf2))
! 1810: SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
! 1811:
! 1812: if (IS_NULL(bbuf1)) {
! 1813: if (not1 != 0) {
! 1814: return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
! 1815: }
! 1816: else {
! 1817: if (not2 == 0) {
! 1818: return bbuf_clone(pbuf, bbuf2);
! 1819: }
! 1820: else {
! 1821: return not_code_range_buf(enc, bbuf2, pbuf);
! 1822: }
! 1823: }
! 1824: }
! 1825:
! 1826: if (not1 != 0)
! 1827: SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
! 1828:
! 1829: data1 = (OnigCodePoint* )(bbuf1->p);
! 1830: GET_CODE_POINT(n1, data1);
! 1831: data1++;
! 1832:
! 1833: if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
! 1834: r = bbuf_clone(pbuf, bbuf2);
! 1835: }
! 1836: else if (not1 == 0) { /* 1 OR (not 2) */
! 1837: r = not_code_range_buf(enc, bbuf2, pbuf);
! 1838: }
! 1839: if (r != 0) return r;
! 1840:
! 1841: for (i = 0; i < n1; i++) {
! 1842: from = data1[i*2];
! 1843: to = data1[i*2+1];
! 1844: r = add_code_range_to_buf(pbuf, from, to);
! 1845: if (r != 0) return r;
! 1846: }
! 1847: return 0;
! 1848: }
! 1849:
! 1850: static int
! 1851: and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
! 1852: OnigCodePoint* data, int n)
! 1853: {
! 1854: int i, r;
! 1855: OnigCodePoint from2, to2;
! 1856:
! 1857: for (i = 0; i < n; i++) {
! 1858: from2 = data[i*2];
! 1859: to2 = data[i*2+1];
! 1860: if (from2 < from1) {
! 1861: if (to2 < from1) continue;
! 1862: else {
! 1863: from1 = to2 + 1;
! 1864: }
! 1865: }
! 1866: else if (from2 <= to1) {
! 1867: if (to2 < to1) {
! 1868: if (from1 <= from2 - 1) {
! 1869: r = add_code_range_to_buf(pbuf, from1, from2-1);
! 1870: if (r != 0) return r;
! 1871: }
! 1872: from1 = to2 + 1;
! 1873: }
! 1874: else {
! 1875: to1 = from2 - 1;
! 1876: }
! 1877: }
! 1878: else {
! 1879: from1 = from2;
! 1880: }
! 1881: if (from1 > to1) break;
! 1882: }
! 1883: if (from1 <= to1) {
! 1884: r = add_code_range_to_buf(pbuf, from1, to1);
! 1885: if (r != 0) return r;
! 1886: }
! 1887: return 0;
! 1888: }
! 1889:
! 1890: static int
! 1891: and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
! 1892: {
! 1893: int r;
! 1894: OnigCodePoint i, j, n1, n2, *data1, *data2;
! 1895: OnigCodePoint from, to, from1, to1, from2, to2;
! 1896:
! 1897: *pbuf = (BBuf* )NULL;
! 1898: if (IS_NULL(bbuf1)) {
! 1899: if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
! 1900: return bbuf_clone(pbuf, bbuf2);
! 1901: return 0;
! 1902: }
! 1903: else if (IS_NULL(bbuf2)) {
! 1904: if (not2 != 0)
! 1905: return bbuf_clone(pbuf, bbuf1);
! 1906: return 0;
! 1907: }
! 1908:
! 1909: if (not1 != 0)
! 1910: SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
! 1911:
! 1912: data1 = (OnigCodePoint* )(bbuf1->p);
! 1913: data2 = (OnigCodePoint* )(bbuf2->p);
! 1914: GET_CODE_POINT(n1, data1);
! 1915: GET_CODE_POINT(n2, data2);
! 1916: data1++;
! 1917: data2++;
! 1918:
! 1919: if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
! 1920: for (i = 0; i < n1; i++) {
! 1921: from1 = data1[i*2];
! 1922: to1 = data1[i*2+1];
! 1923: for (j = 0; j < n2; j++) {
! 1924: from2 = data2[j*2];
! 1925: to2 = data2[j*2+1];
! 1926: if (from2 > to1) break;
! 1927: if (to2 < from1) continue;
! 1928: from = MAX(from1, from2);
! 1929: to = MIN(to1, to2);
! 1930: r = add_code_range_to_buf(pbuf, from, to);
! 1931: if (r != 0) return r;
! 1932: }
! 1933: }
! 1934: }
! 1935: else if (not1 == 0) { /* 1 AND (not 2) */
! 1936: for (i = 0; i < n1; i++) {
! 1937: from1 = data1[i*2];
! 1938: to1 = data1[i*2+1];
! 1939: r = and_code_range1(pbuf, from1, to1, data2, n2);
! 1940: if (r != 0) return r;
! 1941: }
! 1942: }
! 1943:
! 1944: return 0;
! 1945: }
! 1946:
! 1947: static int
! 1948: and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
! 1949: {
! 1950: int r, not1, not2;
! 1951: BBuf *buf1, *buf2, *pbuf;
! 1952: BitSetRef bsr1, bsr2;
! 1953: BitSet bs1, bs2;
! 1954:
! 1955: not1 = IS_CCLASS_NOT(dest);
! 1956: bsr1 = dest->bs;
! 1957: buf1 = dest->mbuf;
! 1958: not2 = IS_CCLASS_NOT(cc);
! 1959: bsr2 = cc->bs;
! 1960: buf2 = cc->mbuf;
! 1961:
! 1962: if (not1 != 0) {
! 1963: bitset_invert_to(bsr1, bs1);
! 1964: bsr1 = bs1;
! 1965: }
! 1966: if (not2 != 0) {
! 1967: bitset_invert_to(bsr2, bs2);
! 1968: bsr2 = bs2;
! 1969: }
! 1970: bitset_and(bsr1, bsr2);
! 1971: if (bsr1 != dest->bs) {
! 1972: bitset_copy(dest->bs, bsr1);
! 1973: bsr1 = dest->bs;
! 1974: }
! 1975: if (not1 != 0) {
! 1976: bitset_invert(dest->bs);
! 1977: }
! 1978:
! 1979: if (! ONIGENC_IS_SINGLEBYTE(enc)) {
! 1980: if (not1 != 0 && not2 != 0) {
! 1981: r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
! 1982: }
! 1983: else {
! 1984: r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
! 1985: if (r == 0 && not1 != 0) {
! 1986: BBuf *tbuf;
! 1987: r = not_code_range_buf(enc, pbuf, &tbuf);
! 1988: if (r != 0) {
! 1989: bbuf_free(pbuf);
! 1990: return r;
! 1991: }
! 1992: bbuf_free(pbuf);
! 1993: pbuf = tbuf;
! 1994: }
! 1995: }
! 1996: if (r != 0) return r;
! 1997:
! 1998: dest->mbuf = pbuf;
! 1999: bbuf_free(buf1);
! 2000: return r;
! 2001: }
! 2002: return 0;
! 2003: }
! 2004:
! 2005: static int
! 2006: or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
! 2007: {
! 2008: int r, not1, not2;
! 2009: BBuf *buf1, *buf2, *pbuf;
! 2010: BitSetRef bsr1, bsr2;
! 2011: BitSet bs1, bs2;
! 2012:
! 2013: not1 = IS_CCLASS_NOT(dest);
! 2014: bsr1 = dest->bs;
! 2015: buf1 = dest->mbuf;
! 2016: not2 = IS_CCLASS_NOT(cc);
! 2017: bsr2 = cc->bs;
! 2018: buf2 = cc->mbuf;
! 2019:
! 2020: if (not1 != 0) {
! 2021: bitset_invert_to(bsr1, bs1);
! 2022: bsr1 = bs1;
! 2023: }
! 2024: if (not2 != 0) {
! 2025: bitset_invert_to(bsr2, bs2);
! 2026: bsr2 = bs2;
! 2027: }
! 2028: bitset_or(bsr1, bsr2);
! 2029: if (bsr1 != dest->bs) {
! 2030: bitset_copy(dest->bs, bsr1);
! 2031: bsr1 = dest->bs;
! 2032: }
! 2033: if (not1 != 0) {
! 2034: bitset_invert(dest->bs);
! 2035: }
! 2036:
! 2037: if (! ONIGENC_IS_SINGLEBYTE(enc)) {
! 2038: if (not1 != 0 && not2 != 0) {
! 2039: r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
! 2040: }
! 2041: else {
! 2042: r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
! 2043: if (r == 0 && not1 != 0) {
! 2044: BBuf *tbuf;
! 2045: r = not_code_range_buf(enc, pbuf, &tbuf);
! 2046: if (r != 0) {
! 2047: bbuf_free(pbuf);
! 2048: return r;
! 2049: }
! 2050: bbuf_free(pbuf);
! 2051: pbuf = tbuf;
! 2052: }
! 2053: }
! 2054: if (r != 0) return r;
! 2055:
! 2056: dest->mbuf = pbuf;
! 2057: bbuf_free(buf1);
! 2058: return r;
! 2059: }
! 2060: else
! 2061: return 0;
! 2062: }
! 2063:
! 2064: static int
! 2065: conv_backslash_value(int c, ScanEnv* env)
! 2066: {
! 2067: if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
! 2068: switch (c) {
! 2069: case 'n': return '\n';
! 2070: case 't': return '\t';
! 2071: case 'r': return '\r';
! 2072: case 'f': return '\f';
! 2073: case 'a': return '\007';
! 2074: case 'b': return '\010';
! 2075: case 'e': return '\033';
! 2076: case 'v':
! 2077: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
! 2078: return '\v';
! 2079: break;
! 2080:
! 2081: default:
! 2082: break;
! 2083: }
! 2084: }
! 2085: return c;
! 2086: }
! 2087:
! 2088: static int
! 2089: is_invalid_quantifier_target(Node* node)
! 2090: {
! 2091: switch (NTYPE(node)) {
! 2092: case N_ANCHOR:
! 2093: return 1;
! 2094: break;
! 2095:
! 2096: case N_EFFECT:
! 2097: if (NEFFECT(node).type == EFFECT_OPTION)
! 2098: return is_invalid_quantifier_target(NEFFECT(node).target);
! 2099: break;
! 2100:
! 2101: case N_LIST: /* ex. (?:\G\A)* */
! 2102: do {
! 2103: if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
! 2104: } while (IS_NOT_NULL(node = NCONS(node).right));
! 2105: return 0;
! 2106: break;
! 2107:
! 2108: case N_ALT: /* ex. (?:abc|\A)* */
! 2109: do {
! 2110: if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
! 2111: } while (IS_NOT_NULL(node = NCONS(node).right));
! 2112: break;
! 2113:
! 2114: default:
! 2115: break;
! 2116: }
! 2117: return 0;
! 2118: }
! 2119:
! 2120: /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
! 2121: static int
! 2122: popular_quantifier_num(QuantifierNode* qf)
! 2123: {
! 2124: if (qf->greedy) {
! 2125: if (qf->lower == 0) {
! 2126: if (qf->upper == 1) return 0;
! 2127: else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
! 2128: }
! 2129: else if (qf->lower == 1) {
! 2130: if (IS_REPEAT_INFINITE(qf->upper)) return 2;
! 2131: }
! 2132: }
! 2133: else {
! 2134: if (qf->lower == 0) {
! 2135: if (qf->upper == 1) return 3;
! 2136: else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
! 2137: }
! 2138: else if (qf->lower == 1) {
! 2139: if (IS_REPEAT_INFINITE(qf->upper)) return 5;
! 2140: }
! 2141: }
! 2142: return -1;
! 2143: }
! 2144:
! 2145:
! 2146: enum ReduceType {
! 2147: RQ_ASIS = 0, /* as is */
! 2148: RQ_DEL = 1, /* delete parent */
! 2149: RQ_A, /* to '*' */
! 2150: RQ_AQ, /* to '*?' */
! 2151: RQ_QQ, /* to '??' */
! 2152: RQ_P_QQ, /* to '+)??' */
! 2153: RQ_PQ_Q /* to '+?)?' */
! 2154: };
! 2155:
! 2156: static enum ReduceType ReduceTypeTable[6][6] = {
! 2157: {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
! 2158: {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
! 2159: {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
! 2160: {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
! 2161: {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
! 2162: {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
! 2163: };
! 2164:
! 2165: extern void
! 2166: onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
! 2167: {
! 2168: int pnum, cnum;
! 2169: QuantifierNode *p, *c;
! 2170:
! 2171: p = &(NQUANTIFIER(pnode));
! 2172: c = &(NQUANTIFIER(cnode));
! 2173: pnum = popular_quantifier_num(p);
! 2174: cnum = popular_quantifier_num(c);
! 2175:
! 2176: switch(ReduceTypeTable[cnum][pnum]) {
! 2177: case RQ_DEL:
! 2178: *p = *c;
! 2179: break;
! 2180: case RQ_A:
! 2181: p->target = c->target;
! 2182: p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
! 2183: break;
! 2184: case RQ_AQ:
! 2185: p->target = c->target;
! 2186: p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
! 2187: break;
! 2188: case RQ_QQ:
! 2189: p->target = c->target;
! 2190: p->lower = 0; p->upper = 1; p->greedy = 0;
! 2191: break;
! 2192: case RQ_P_QQ:
! 2193: p->target = cnode;
! 2194: p->lower = 0; p->upper = 1; p->greedy = 0;
! 2195: c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
! 2196: return ;
! 2197: break;
! 2198: case RQ_PQ_Q:
! 2199: p->target = cnode;
! 2200: p->lower = 0; p->upper = 1; p->greedy = 1;
! 2201: c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
! 2202: return ;
! 2203: break;
! 2204: case RQ_ASIS:
! 2205: p->target = cnode;
! 2206: return ;
! 2207: break;
! 2208: }
! 2209:
! 2210: c->target = NULL_NODE;
! 2211: onig_node_free(cnode);
! 2212: }
! 2213:
! 2214:
! 2215: enum TokenSyms {
! 2216: TK_EOT = 0, /* end of token */
! 2217: TK_RAW_BYTE = 1,
! 2218: TK_CHAR,
! 2219: TK_STRING,
! 2220: TK_CODE_POINT,
! 2221: TK_ANYCHAR,
! 2222: TK_CHAR_TYPE,
! 2223: TK_BACKREF,
! 2224: TK_CALL,
! 2225: TK_ANCHOR,
! 2226: TK_OP_REPEAT,
! 2227: TK_INTERVAL,
! 2228: TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
! 2229: TK_ALT,
! 2230: TK_SUBEXP_OPEN,
! 2231: TK_SUBEXP_CLOSE,
! 2232: TK_CC_OPEN,
! 2233: TK_QUOTE_OPEN,
! 2234: TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
! 2235: /* in cc */
! 2236: TK_CC_CLOSE,
! 2237: TK_CC_RANGE,
! 2238: TK_POSIX_BRACKET_OPEN,
! 2239: TK_CC_AND, /* && */
! 2240: TK_CC_CC_OPEN /* [ */
! 2241: };
! 2242:
! 2243: typedef struct {
! 2244: enum TokenSyms type;
! 2245: int escaped;
! 2246: int base; /* is number: 8, 16 (used in [....]) */
! 2247: UChar* backp;
! 2248: union {
! 2249: UChar* s;
! 2250: int c;
! 2251: OnigCodePoint code;
! 2252: int anchor;
! 2253: int subtype;
! 2254: struct {
! 2255: int lower;
! 2256: int upper;
! 2257: int greedy;
! 2258: int possessive;
! 2259: } repeat;
! 2260: struct {
! 2261: int num;
! 2262: int ref1;
! 2263: int* refs;
! 2264: int by_name;
! 2265: #ifdef USE_BACKREF_AT_LEVEL
! 2266: int exist_level;
! 2267: int level; /* \k<name+n> */
! 2268: #endif
! 2269: } backref;
! 2270: struct {
! 2271: UChar* name;
! 2272: UChar* name_end;
! 2273: } call;
! 2274: struct {
! 2275: int not;
! 2276: } prop;
! 2277: } u;
! 2278: } OnigToken;
! 2279:
! 2280:
! 2281: static int
! 2282: fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
! 2283: {
! 2284: int low, up, syn_allow, non_low = 0;
! 2285: int r = 0;
! 2286: OnigCodePoint c;
! 2287: OnigEncoding enc = env->enc;
! 2288: UChar* p = *src;
! 2289: PFETCH_READY;
! 2290:
! 2291: syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
! 2292:
! 2293: if (PEND) {
! 2294: if (syn_allow)
! 2295: return 1; /* "....{" : OK! */
! 2296: else
! 2297: return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
! 2298: }
! 2299:
! 2300: if (! syn_allow) {
! 2301: c = PPEEK;
! 2302: if (c == ')' || c == '(' || c == '|') {
! 2303: return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
! 2304: }
! 2305: }
! 2306:
! 2307: low = onig_scan_unsigned_number(&p, end, env->enc);
! 2308: if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
! 2309: if (low > ONIG_MAX_REPEAT_NUM)
! 2310: return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
! 2311:
! 2312: if (p == *src) { /* can't read low */
! 2313: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
! 2314: /* allow {,n} as {0,n} */
! 2315: low = 0;
! 2316: non_low = 1;
! 2317: }
! 2318: else
! 2319: goto invalid;
! 2320: }
! 2321:
! 2322: if (PEND) goto invalid;
! 2323: PFETCH(c);
! 2324: if (c == ',') {
! 2325: UChar* prev = p;
! 2326: up = onig_scan_unsigned_number(&p, end, env->enc);
! 2327: if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
! 2328: if (up > ONIG_MAX_REPEAT_NUM)
! 2329: return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
! 2330:
! 2331: if (p == prev) {
! 2332: if (non_low != 0)
! 2333: goto invalid;
! 2334: up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
! 2335: }
! 2336: }
! 2337: else {
! 2338: if (non_low != 0)
! 2339: goto invalid;
! 2340:
! 2341: PUNFETCH;
! 2342: up = low; /* {n} : exact n times */
! 2343: r = 2; /* fixed */
! 2344: }
! 2345:
! 2346: if (PEND) goto invalid;
! 2347: PFETCH(c);
! 2348: if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
! 2349: if (c != MC_ESC(enc)) goto invalid;
! 2350: PFETCH(c);
! 2351: }
! 2352: if (c != '}') goto invalid;
! 2353:
! 2354: if (!IS_REPEAT_INFINITE(up) && low > up) {
! 2355: return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
! 2356: }
! 2357:
! 2358: tok->type = TK_INTERVAL;
! 2359: tok->u.repeat.lower = low;
! 2360: tok->u.repeat.upper = up;
! 2361: *src = p;
! 2362: return r; /* 0: normal {n,m}, 2: fixed {n} */
! 2363:
! 2364: invalid:
! 2365: if (syn_allow)
! 2366: return 1; /* OK */
! 2367: else
! 2368: return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
! 2369: }
! 2370:
! 2371: /* \M-, \C-, \c, or \... */
! 2372: static int
! 2373: fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
! 2374: {
! 2375: int v;
! 2376: OnigCodePoint c;
! 2377: OnigEncoding enc = env->enc;
! 2378: UChar* p = *src;
! 2379: PFETCH_READY;
! 2380:
! 2381: if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
! 2382:
! 2383: PFETCH(c);
! 2384: switch (c) {
! 2385: case 'M':
! 2386: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
! 2387: if (PEND) return ONIGERR_END_PATTERN_AT_META;
! 2388: PFETCH(c);
! 2389: if (c != '-') return ONIGERR_META_CODE_SYNTAX;
! 2390: if (PEND) return ONIGERR_END_PATTERN_AT_META;
! 2391: PFETCH(c);
! 2392: if (c == MC_ESC(enc)) {
! 2393: v = fetch_escaped_value(&p, end, env);
! 2394: if (v < 0) return v;
! 2395: c = (OnigCodePoint )v;
! 2396: }
! 2397: c = ((c & 0xff) | 0x80);
! 2398: }
! 2399: else
! 2400: goto backslash;
! 2401: break;
! 2402:
! 2403: case 'C':
! 2404: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
! 2405: if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
! 2406: PFETCH(c);
! 2407: if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
! 2408: goto control;
! 2409: }
! 2410: else
! 2411: goto backslash;
! 2412:
! 2413: case 'c':
! 2414: if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
! 2415: control:
! 2416: if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
! 2417: PFETCH(c);
! 2418: if (c == '?') {
! 2419: c = 0177;
! 2420: }
! 2421: else {
! 2422: if (c == MC_ESC(enc)) {
! 2423: v = fetch_escaped_value(&p, end, env);
! 2424: if (v < 0) return v;
! 2425: c = (OnigCodePoint )v;
! 2426: }
! 2427: c &= 0x9f;
! 2428: }
! 2429: break;
! 2430: }
! 2431: /* fall through */
! 2432:
! 2433: default:
! 2434: {
! 2435: backslash:
! 2436: c = conv_backslash_value(c, env);
! 2437: }
! 2438: break;
! 2439: }
! 2440:
! 2441: *src = p;
! 2442: return c;
! 2443: }
! 2444:
! 2445: static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
! 2446:
! 2447: #ifdef USE_NAMED_GROUP
! 2448: #ifdef USE_BACKREF_AT_LEVEL
! 2449: /*
! 2450: \k<name+n>, \k<name-n>
! 2451: */
! 2452: static int
! 2453: fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
! 2454: , ScanEnv* env, int* level)
! 2455: {
! 2456: int r, exist_level = 0;
! 2457: OnigCodePoint c = 0;
! 2458: OnigCodePoint first_code;
! 2459: OnigEncoding enc = env->enc;
! 2460: UChar *name_end;
! 2461: UChar *p = *src;
! 2462: PFETCH_READY;
! 2463:
! 2464: name_end = end;
! 2465: r = 0;
! 2466: if (PEND) {
! 2467: return ONIGERR_EMPTY_GROUP_NAME;
! 2468: }
! 2469: else {
! 2470: PFETCH(c);
! 2471: first_code = c;
! 2472: if (c == '>')
! 2473: return ONIGERR_EMPTY_GROUP_NAME;
! 2474:
! 2475: if (!ONIGENC_IS_CODE_WORD(enc, c)) {
! 2476: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2477: }
! 2478: }
! 2479:
! 2480: while (!PEND) {
! 2481: name_end = p;
! 2482: PFETCH(c);
! 2483: if (c == '>' || c == ')' || c == '+' || c == '-') break;
! 2484:
! 2485: if (!ONIGENC_IS_CODE_WORD(enc, c)) {
! 2486: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2487: }
! 2488: }
! 2489:
! 2490: if (c != '>') {
! 2491: if (c == '+' || c == '-') {
! 2492: int num;
! 2493: int flag = (c == '-' ? -1 : 1);
! 2494:
! 2495: PFETCH(c);
! 2496: if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
! 2497: PUNFETCH;
! 2498: num = onig_scan_unsigned_number(&p, end, enc);
! 2499: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 2500: *level = (num * flag);
! 2501: exist_level = 1;
! 2502:
! 2503: PFETCH(c);
! 2504: if (c == '>')
! 2505: goto first_check;
! 2506: }
! 2507:
! 2508: err:
! 2509: r = ONIGERR_INVALID_GROUP_NAME;
! 2510: name_end = end;
! 2511: }
! 2512: else {
! 2513: first_check:
! 2514: if (ONIGENC_IS_CODE_ASCII(first_code) &&
! 2515: ONIGENC_IS_CODE_UPPER(enc, first_code))
! 2516: r = ONIGERR_INVALID_GROUP_NAME;
! 2517: }
! 2518:
! 2519: if (r == 0) {
! 2520: *rname_end = name_end;
! 2521: *src = p;
! 2522: return (exist_level ? 1 : 0);
! 2523: }
! 2524: else {
! 2525: onig_scan_env_set_error_string(env, r, *src, name_end);
! 2526: return r;
! 2527: }
! 2528: }
! 2529: #endif /* USE_BACKREF_AT_LEVEL */
! 2530:
! 2531: /*
! 2532: def: 0 -> define name (don't allow number name)
! 2533: 1 -> reference name (allow number name)
! 2534: */
! 2535: static int
! 2536: fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
! 2537: {
! 2538: int r, is_num;
! 2539: OnigCodePoint c = 0;
! 2540: OnigCodePoint first_code;
! 2541: OnigEncoding enc = env->enc;
! 2542: UChar *name_end;
! 2543: UChar *p = *src;
! 2544: PFETCH_READY;
! 2545:
! 2546: name_end = end;
! 2547: r = 0;
! 2548: is_num = 0;
! 2549: if (PEND) {
! 2550: return ONIGERR_EMPTY_GROUP_NAME;
! 2551: }
! 2552: else {
! 2553: PFETCH(c);
! 2554: first_code = c;
! 2555: if (c == '>')
! 2556: return ONIGERR_EMPTY_GROUP_NAME;
! 2557:
! 2558: if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
! 2559: if (ref == 1)
! 2560: is_num = 1;
! 2561: else {
! 2562: r = ONIGERR_INVALID_GROUP_NAME;
! 2563: }
! 2564: }
! 2565: else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
! 2566: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2567: }
! 2568: }
! 2569:
! 2570: while (!PEND) {
! 2571: name_end = p;
! 2572: PFETCH(c);
! 2573: if (c == '>' || c == ')') break;
! 2574:
! 2575: if (is_num == 1) {
! 2576: if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
! 2577: if (!ONIGENC_IS_CODE_WORD(enc, c))
! 2578: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2579: else
! 2580: r = ONIGERR_INVALID_GROUP_NAME;
! 2581: }
! 2582: }
! 2583: else {
! 2584: if (!ONIGENC_IS_CODE_WORD(enc, c)) {
! 2585: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2586: }
! 2587: }
! 2588: }
! 2589:
! 2590: if (c != '>') {
! 2591: r = ONIGERR_INVALID_GROUP_NAME;
! 2592: name_end = end;
! 2593: }
! 2594: else {
! 2595: if (ONIGENC_IS_CODE_ASCII(first_code) &&
! 2596: ONIGENC_IS_CODE_UPPER(enc, first_code))
! 2597: r = ONIGERR_INVALID_GROUP_NAME;
! 2598: }
! 2599:
! 2600: if (r == 0) {
! 2601: *rname_end = name_end;
! 2602: *src = p;
! 2603: return 0;
! 2604: }
! 2605: else {
! 2606: onig_scan_env_set_error_string(env, r, *src, name_end);
! 2607: return r;
! 2608: }
! 2609: }
! 2610: #else
! 2611: static int
! 2612: fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
! 2613: {
! 2614: int r, len;
! 2615: OnigCodePoint c = 0;
! 2616: UChar *name_end;
! 2617: OnigEncoding enc = env->enc;
! 2618: UChar *p = *src;
! 2619: PFETCH_READY;
! 2620:
! 2621: r = 0;
! 2622: while (!PEND) {
! 2623: name_end = p;
! 2624: if (enc_len(enc, p) > 1)
! 2625: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2626:
! 2627: PFETCH(c);
! 2628: if (c == '>' || c == ')') break;
! 2629: if (! ONIGENC_IS_CODE_DIGIT(enc, c))
! 2630: r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
! 2631: }
! 2632: if (c != '>') {
! 2633: r = ONIGERR_INVALID_GROUP_NAME;
! 2634: name_end = end;
! 2635: }
! 2636:
! 2637: if (r == 0) {
! 2638: *rname_end = name_end;
! 2639: *src = p;
! 2640: return 0;
! 2641: }
! 2642: else {
! 2643: err:
! 2644: onig_scan_env_set_error_string(env, r, *src, name_end);
! 2645: return r;
! 2646: }
! 2647: }
! 2648: #endif
! 2649:
! 2650: static void
! 2651: CC_ESC_WARN(ScanEnv* env, UChar *c)
! 2652: {
! 2653: if (onig_warn == onig_null_warn) return ;
! 2654:
! 2655: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
! 2656: IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
! 2657: UChar buf[WARN_BUFSIZE];
! 2658: onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
! 2659: env->pattern, env->pattern_end,
! 2660: (UChar* )"character class has '%s' without escape", c);
! 2661: (*onig_warn)((char* )buf);
! 2662: }
! 2663: }
! 2664:
! 2665: static void
! 2666: CCEND_ESC_WARN(ScanEnv* env, UChar* c)
! 2667: {
! 2668: if (onig_warn == onig_null_warn) return ;
! 2669:
! 2670: if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
! 2671: UChar buf[WARN_BUFSIZE];
! 2672: onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
! 2673: (env)->pattern, (env)->pattern_end,
! 2674: (UChar* )"regular expression has '%s' without escape", c);
! 2675: (*onig_warn)((char* )buf);
! 2676: }
! 2677: }
! 2678:
! 2679: static UChar*
! 2680: find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
! 2681: UChar **next, OnigEncoding enc)
! 2682: {
! 2683: int i;
! 2684: OnigCodePoint x;
! 2685: UChar *q;
! 2686: UChar *p = from;
! 2687:
! 2688: while (p < to) {
! 2689: x = ONIGENC_MBC_TO_CODE(enc, p, to);
! 2690: q = p + enc_len(enc, p);
! 2691: if (x == s[0]) {
! 2692: for (i = 1; i < n && q < to; i++) {
! 2693: x = ONIGENC_MBC_TO_CODE(enc, q, to);
! 2694: if (x != s[i]) break;
! 2695: q += enc_len(enc, q);
! 2696: }
! 2697: if (i >= n) {
! 2698: if (IS_NOT_NULL(next))
! 2699: *next = q;
! 2700: return p;
! 2701: }
! 2702: }
! 2703: p = q;
! 2704: }
! 2705: return NULL_UCHARP;
! 2706: }
! 2707:
! 2708: static int
! 2709: str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
! 2710: OnigCodePoint bad, OnigEncoding enc)
! 2711: {
! 2712: int i, in_esc;
! 2713: OnigCodePoint x;
! 2714: UChar *q;
! 2715: UChar *p = from;
! 2716:
! 2717: in_esc = 0;
! 2718: while (p < to) {
! 2719: if (in_esc) {
! 2720: in_esc = 0;
! 2721: p += enc_len(enc, p);
! 2722: }
! 2723: else {
! 2724: x = ONIGENC_MBC_TO_CODE(enc, p, to);
! 2725: q = p + enc_len(enc, p);
! 2726: if (x == s[0]) {
! 2727: for (i = 1; i < n && q < to; i++) {
! 2728: x = ONIGENC_MBC_TO_CODE(enc, q, to);
! 2729: if (x != s[i]) break;
! 2730: q += enc_len(enc, q);
! 2731: }
! 2732: if (i >= n) return 1;
! 2733: p += enc_len(enc, p);
! 2734: }
! 2735: else {
! 2736: x = ONIGENC_MBC_TO_CODE(enc, p, to);
! 2737: if (x == bad) return 0;
! 2738: else if (x == MC_ESC(enc)) in_esc = 1;
! 2739: p = q;
! 2740: }
! 2741: }
! 2742: }
! 2743: return 0;
! 2744: }
! 2745:
! 2746: static int
! 2747: fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
! 2748: {
! 2749: int num;
! 2750: OnigCodePoint c, c2;
! 2751: OnigSyntaxType* syn = env->syntax;
! 2752: OnigEncoding enc = env->enc;
! 2753: UChar* prev;
! 2754: UChar* p = *src;
! 2755: PFETCH_READY;
! 2756:
! 2757: if (PEND) {
! 2758: tok->type = TK_EOT;
! 2759: return tok->type;
! 2760: }
! 2761:
! 2762: PFETCH(c);
! 2763: tok->type = TK_CHAR;
! 2764: tok->base = 0;
! 2765: tok->u.c = c;
! 2766: tok->escaped = 0;
! 2767:
! 2768: if (c == ']') {
! 2769: tok->type = TK_CC_CLOSE;
! 2770: }
! 2771: else if (c == '-') {
! 2772: tok->type = TK_CC_RANGE;
! 2773: }
! 2774: else if (c == MC_ESC(enc)) {
! 2775: if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
! 2776: goto end;
! 2777:
! 2778: if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
! 2779:
! 2780: PFETCH(c);
! 2781: tok->escaped = 1;
! 2782: tok->u.c = c;
! 2783: switch (c) {
! 2784: case 'w':
! 2785: tok->type = TK_CHAR_TYPE;
! 2786: tok->u.subtype = CTYPE_WORD;
! 2787: break;
! 2788: case 'W':
! 2789: tok->type = TK_CHAR_TYPE;
! 2790: tok->u.subtype = CTYPE_NOT_WORD;
! 2791: break;
! 2792: case 'd':
! 2793: tok->type = TK_CHAR_TYPE;
! 2794: tok->u.subtype = CTYPE_DIGIT;
! 2795: break;
! 2796: case 'D':
! 2797: tok->type = TK_CHAR_TYPE;
! 2798: tok->u.subtype = CTYPE_NOT_DIGIT;
! 2799: break;
! 2800: case 's':
! 2801: tok->type = TK_CHAR_TYPE;
! 2802: tok->u.subtype = CTYPE_WHITE_SPACE;
! 2803: break;
! 2804: case 'S':
! 2805: tok->type = TK_CHAR_TYPE;
! 2806: tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
! 2807: break;
! 2808: case 'h':
! 2809: if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
! 2810: tok->type = TK_CHAR_TYPE;
! 2811: tok->u.subtype = CTYPE_XDIGIT;
! 2812: break;
! 2813: case 'H':
! 2814: if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
! 2815: tok->type = TK_CHAR_TYPE;
! 2816: tok->u.subtype = CTYPE_NOT_XDIGIT;
! 2817: break;
! 2818:
! 2819: case 'p':
! 2820: case 'P':
! 2821: c2 = PPEEK;
! 2822: if (c2 == '{' &&
! 2823: IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
! 2824: PINC;
! 2825: tok->type = TK_CHAR_PROPERTY;
! 2826: tok->u.prop.not = (c == 'P' ? 1 : 0);
! 2827:
! 2828: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
! 2829: PFETCH(c2);
! 2830: if (c2 == '^') {
! 2831: tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
! 2832: }
! 2833: else
! 2834: PUNFETCH;
! 2835: }
! 2836: }
! 2837: break;
! 2838:
! 2839: case 'x':
! 2840: if (PEND) break;
! 2841:
! 2842: prev = p;
! 2843: if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
! 2844: PINC;
! 2845: num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
! 2846: if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
! 2847: if (!PEND) {
! 2848: c2 = PPEEK;
! 2849: if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
! 2850: return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
! 2851: }
! 2852:
! 2853: if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) {
! 2854: PINC;
! 2855: tok->type = TK_CODE_POINT;
! 2856: tok->base = 16;
! 2857: tok->u.code = (OnigCodePoint )num;
! 2858: }
! 2859: else {
! 2860: /* can't read nothing or invalid format */
! 2861: p = prev;
! 2862: }
! 2863: }
! 2864: else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
! 2865: num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
! 2866: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 2867: if (p == prev) { /* can't read nothing. */
! 2868: num = 0; /* but, it's not error */
! 2869: }
! 2870: tok->type = TK_RAW_BYTE;
! 2871: tok->base = 16;
! 2872: tok->u.c = num;
! 2873: }
! 2874: break;
! 2875:
! 2876: case 'u':
! 2877: if (PEND) break;
! 2878:
! 2879: prev = p;
! 2880: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
! 2881: num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
! 2882: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 2883: if (p == prev) { /* can't read nothing. */
! 2884: num = 0; /* but, it's not error */
! 2885: }
! 2886: tok->type = TK_CODE_POINT;
! 2887: tok->base = 16;
! 2888: tok->u.code = (OnigCodePoint )num;
! 2889: }
! 2890: break;
! 2891:
! 2892: case '0':
! 2893: case '1': case '2': case '3': case '4': case '5': case '6': case '7':
! 2894: if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
! 2895: PUNFETCH;
! 2896: prev = p;
! 2897: num = scan_unsigned_octal_number(&p, end, 3, enc);
! 2898: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 2899: if (p == prev) { /* can't read nothing. */
! 2900: num = 0; /* but, it's not error */
! 2901: }
! 2902: tok->type = TK_RAW_BYTE;
! 2903: tok->base = 8;
! 2904: tok->u.c = num;
! 2905: }
! 2906: break;
! 2907:
! 2908: default:
! 2909: PUNFETCH;
! 2910: num = fetch_escaped_value(&p, end, env);
! 2911: if (num < 0) return num;
! 2912: if (tok->u.c != num) {
! 2913: tok->u.code = (OnigCodePoint )num;
! 2914: tok->type = TK_CODE_POINT;
! 2915: }
! 2916: break;
! 2917: }
! 2918: }
! 2919: else if (c == '[') {
! 2920: if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
! 2921: OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
! 2922: tok->backp = p; /* point at '[' is readed */
! 2923: PINC;
! 2924: if (str_exist_check_with_esc(send, 2, p, end,
! 2925: (OnigCodePoint )']', enc)) {
! 2926: tok->type = TK_POSIX_BRACKET_OPEN;
! 2927: }
! 2928: else {
! 2929: PUNFETCH;
! 2930: goto cc_in_cc;
! 2931: }
! 2932: }
! 2933: else {
! 2934: cc_in_cc:
! 2935: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
! 2936: tok->type = TK_CC_CC_OPEN;
! 2937: }
! 2938: else {
! 2939: CC_ESC_WARN(env, (UChar* )"[");
! 2940: }
! 2941: }
! 2942: }
! 2943: else if (c == '&') {
! 2944: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
! 2945: !PEND && (PPEEK_IS('&'))) {
! 2946: PINC;
! 2947: tok->type = TK_CC_AND;
! 2948: }
! 2949: }
! 2950:
! 2951: end:
! 2952: *src = p;
! 2953: return tok->type;
! 2954: }
! 2955:
! 2956: static int
! 2957: fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
! 2958: {
! 2959: int r, num;
! 2960: OnigCodePoint c;
! 2961: OnigEncoding enc = env->enc;
! 2962: OnigSyntaxType* syn = env->syntax;
! 2963: UChar* prev;
! 2964: UChar* p = *src;
! 2965: PFETCH_READY;
! 2966:
! 2967: start:
! 2968: if (PEND) {
! 2969: tok->type = TK_EOT;
! 2970: return tok->type;
! 2971: }
! 2972:
! 2973: tok->type = TK_STRING;
! 2974: tok->base = 0;
! 2975: tok->backp = p;
! 2976:
! 2977: PFETCH(c);
! 2978: if (IS_MC_ESC_CODE(c, enc, syn)) {
! 2979: if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
! 2980:
! 2981: tok->backp = p;
! 2982: PFETCH(c);
! 2983:
! 2984: tok->u.c = c;
! 2985: tok->escaped = 1;
! 2986: switch (c) {
! 2987: case '*':
! 2988: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
! 2989: tok->type = TK_OP_REPEAT;
! 2990: tok->u.repeat.lower = 0;
! 2991: tok->u.repeat.upper = REPEAT_INFINITE;
! 2992: goto greedy_check;
! 2993: break;
! 2994:
! 2995: case '+':
! 2996: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
! 2997: tok->type = TK_OP_REPEAT;
! 2998: tok->u.repeat.lower = 1;
! 2999: tok->u.repeat.upper = REPEAT_INFINITE;
! 3000: goto greedy_check;
! 3001: break;
! 3002:
! 3003: case '?':
! 3004: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
! 3005: tok->type = TK_OP_REPEAT;
! 3006: tok->u.repeat.lower = 0;
! 3007: tok->u.repeat.upper = 1;
! 3008: greedy_check:
! 3009: if (!PEND && PPEEK_IS('?') &&
! 3010: IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
! 3011: PFETCH(c);
! 3012: tok->u.repeat.greedy = 0;
! 3013: tok->u.repeat.possessive = 0;
! 3014: }
! 3015: else {
! 3016: possessive_check:
! 3017: if (!PEND && PPEEK_IS('+') &&
! 3018: ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
! 3019: tok->type != TK_INTERVAL) ||
! 3020: (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
! 3021: tok->type == TK_INTERVAL))) {
! 3022: PFETCH(c);
! 3023: tok->u.repeat.greedy = 1;
! 3024: tok->u.repeat.possessive = 1;
! 3025: }
! 3026: else {
! 3027: tok->u.repeat.greedy = 1;
! 3028: tok->u.repeat.possessive = 0;
! 3029: }
! 3030: }
! 3031: break;
! 3032:
! 3033: case '{':
! 3034: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
! 3035: r = fetch_range_quantifier(&p, end, tok, env);
! 3036: if (r < 0) return r; /* error */
! 3037: if (r == 0) goto greedy_check;
! 3038: else if (r == 2) { /* {n} */
! 3039: if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
! 3040: goto possessive_check;
! 3041:
! 3042: goto greedy_check;
! 3043: }
! 3044: /* r == 1 : normal char */
! 3045: break;
! 3046:
! 3047: case '|':
! 3048: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
! 3049: tok->type = TK_ALT;
! 3050: break;
! 3051:
! 3052: case '(':
! 3053: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
! 3054: tok->type = TK_SUBEXP_OPEN;
! 3055: break;
! 3056:
! 3057: case ')':
! 3058: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
! 3059: tok->type = TK_SUBEXP_CLOSE;
! 3060: break;
! 3061:
! 3062: case 'w':
! 3063: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
! 3064: tok->type = TK_CHAR_TYPE;
! 3065: tok->u.subtype = CTYPE_WORD;
! 3066: break;
! 3067:
! 3068: case 'W':
! 3069: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
! 3070: tok->type = TK_CHAR_TYPE;
! 3071: tok->u.subtype = CTYPE_NOT_WORD;
! 3072: break;
! 3073:
! 3074: case 'b':
! 3075: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
! 3076: tok->type = TK_ANCHOR;
! 3077: tok->u.anchor = ANCHOR_WORD_BOUND;
! 3078: break;
! 3079:
! 3080: case 'B':
! 3081: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
! 3082: tok->type = TK_ANCHOR;
! 3083: tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
! 3084: break;
! 3085:
! 3086: #ifdef USE_WORD_BEGIN_END
! 3087: case '<':
! 3088: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
! 3089: tok->type = TK_ANCHOR;
! 3090: tok->u.anchor = ANCHOR_WORD_BEGIN;
! 3091: break;
! 3092:
! 3093: case '>':
! 3094: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
! 3095: tok->type = TK_ANCHOR;
! 3096: tok->u.anchor = ANCHOR_WORD_END;
! 3097: break;
! 3098: #endif
! 3099:
! 3100: case 's':
! 3101: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
! 3102: tok->type = TK_CHAR_TYPE;
! 3103: tok->u.subtype = CTYPE_WHITE_SPACE;
! 3104: break;
! 3105:
! 3106: case 'S':
! 3107: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
! 3108: tok->type = TK_CHAR_TYPE;
! 3109: tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
! 3110: break;
! 3111:
! 3112: case 'd':
! 3113: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
! 3114: tok->type = TK_CHAR_TYPE;
! 3115: tok->u.subtype = CTYPE_DIGIT;
! 3116: break;
! 3117:
! 3118: case 'D':
! 3119: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
! 3120: tok->type = TK_CHAR_TYPE;
! 3121: tok->u.subtype = CTYPE_NOT_DIGIT;
! 3122: break;
! 3123:
! 3124: case 'h':
! 3125: if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
! 3126: tok->type = TK_CHAR_TYPE;
! 3127: tok->u.subtype = CTYPE_XDIGIT;
! 3128: break;
! 3129:
! 3130: case 'H':
! 3131: if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
! 3132: tok->type = TK_CHAR_TYPE;
! 3133: tok->u.subtype = CTYPE_NOT_XDIGIT;
! 3134: break;
! 3135:
! 3136: case 'A':
! 3137: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
! 3138: begin_buf:
! 3139: tok->type = TK_ANCHOR;
! 3140: tok->u.subtype = ANCHOR_BEGIN_BUF;
! 3141: break;
! 3142:
! 3143: case 'Z':
! 3144: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
! 3145: tok->type = TK_ANCHOR;
! 3146: tok->u.subtype = ANCHOR_SEMI_END_BUF;
! 3147: break;
! 3148:
! 3149: case 'z':
! 3150: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
! 3151: end_buf:
! 3152: tok->type = TK_ANCHOR;
! 3153: tok->u.subtype = ANCHOR_END_BUF;
! 3154: break;
! 3155:
! 3156: case 'G':
! 3157: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
! 3158: tok->type = TK_ANCHOR;
! 3159: tok->u.subtype = ANCHOR_BEGIN_POSITION;
! 3160: break;
! 3161:
! 3162: case '`':
! 3163: if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
! 3164: goto begin_buf;
! 3165: break;
! 3166:
! 3167: case '\'':
! 3168: if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
! 3169: goto end_buf;
! 3170: break;
! 3171:
! 3172: case 'x':
! 3173: if (PEND) break;
! 3174:
! 3175: prev = p;
! 3176: if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
! 3177: PINC;
! 3178: num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
! 3179: if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
! 3180: if (!PEND) {
! 3181: if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
! 3182: return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
! 3183: }
! 3184:
! 3185: if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) {
! 3186: PINC;
! 3187: tok->type = TK_CODE_POINT;
! 3188: tok->u.code = (OnigCodePoint )num;
! 3189: }
! 3190: else {
! 3191: /* can't read nothing or invalid format */
! 3192: p = prev;
! 3193: }
! 3194: }
! 3195: else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
! 3196: num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
! 3197: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 3198: if (p == prev) { /* can't read nothing. */
! 3199: num = 0; /* but, it's not error */
! 3200: }
! 3201: tok->type = TK_RAW_BYTE;
! 3202: tok->base = 16;
! 3203: tok->u.c = num;
! 3204: }
! 3205: break;
! 3206:
! 3207: case 'u':
! 3208: if (PEND) break;
! 3209:
! 3210: prev = p;
! 3211: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
! 3212: num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
! 3213: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 3214: if (p == prev) { /* can't read nothing. */
! 3215: num = 0; /* but, it's not error */
! 3216: }
! 3217: tok->type = TK_CODE_POINT;
! 3218: tok->base = 16;
! 3219: tok->u.code = (OnigCodePoint )num;
! 3220: }
! 3221: break;
! 3222:
! 3223: case '1': case '2': case '3': case '4':
! 3224: case '5': case '6': case '7': case '8': case '9':
! 3225: PUNFETCH;
! 3226: prev = p;
! 3227: num = onig_scan_unsigned_number(&p, end, enc);
! 3228: if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
! 3229: goto skip_backref;
! 3230: }
! 3231:
! 3232: if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
! 3233: (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
! 3234: if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
! 3235: if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
! 3236: return ONIGERR_INVALID_BACKREF;
! 3237: }
! 3238:
! 3239: tok->type = TK_BACKREF;
! 3240: tok->u.backref.num = 1;
! 3241: tok->u.backref.ref1 = num;
! 3242: tok->u.backref.by_name = 0;
! 3243: #ifdef USE_BACKREF_AT_LEVEL
! 3244: tok->u.backref.exist_level = 0;
! 3245: #endif
! 3246: break;
! 3247: }
! 3248:
! 3249: skip_backref:
! 3250: if (c == '8' || c == '9') {
! 3251: /* normal char */
! 3252: p = prev; PINC;
! 3253: break;
! 3254: }
! 3255:
! 3256: p = prev;
! 3257: /* fall through */
! 3258: case '0':
! 3259: if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
! 3260: prev = p;
! 3261: num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
! 3262: if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
! 3263: if (p == prev) { /* can't read nothing. */
! 3264: num = 0; /* but, it's not error */
! 3265: }
! 3266: tok->type = TK_RAW_BYTE;
! 3267: tok->base = 8;
! 3268: tok->u.c = num;
! 3269: }
! 3270: else if (c != '0') {
! 3271: PINC;
! 3272: }
! 3273: break;
! 3274:
! 3275: #ifdef USE_NAMED_GROUP
! 3276: case 'k':
! 3277: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
! 3278: PFETCH(c);
! 3279: if (c == '<') {
! 3280: UChar* name_end;
! 3281: int* backs;
! 3282:
! 3283: prev = p;
! 3284:
! 3285: #ifdef USE_BACKREF_AT_LEVEL
! 3286: name_end = NULL_UCHARP; /* no need. escape gcc warning. */
! 3287: r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
! 3288: if (r == 1) tok->u.backref.exist_level = 1;
! 3289: else tok->u.backref.exist_level = 0;
! 3290: #else
! 3291: r = fetch_name(&p, end, &name_end, env, 1);
! 3292: #endif
! 3293: if (r < 0) return r;
! 3294:
! 3295: num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
! 3296: if (num <= 0) {
! 3297: onig_scan_env_set_error_string(env,
! 3298: ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
! 3299: return ONIGERR_UNDEFINED_NAME_REFERENCE;
! 3300: }
! 3301: if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
! 3302: int i;
! 3303: for (i = 0; i < num; i++) {
! 3304: if (backs[i] > env->num_mem ||
! 3305: IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
! 3306: return ONIGERR_INVALID_BACKREF;
! 3307: }
! 3308: }
! 3309:
! 3310: tok->type = TK_BACKREF;
! 3311: tok->u.backref.by_name = 1;
! 3312: if (num == 1) {
! 3313: tok->u.backref.num = 1;
! 3314: tok->u.backref.ref1 = backs[0];
! 3315: }
! 3316: else {
! 3317: tok->u.backref.num = num;
! 3318: tok->u.backref.refs = backs;
! 3319: }
! 3320: }
! 3321: else
! 3322: PUNFETCH;
! 3323: }
! 3324: break;
! 3325: #endif
! 3326:
! 3327: #ifdef USE_SUBEXP_CALL
! 3328: case 'g':
! 3329: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
! 3330: PFETCH(c);
! 3331: if (c == '<') {
! 3332: UChar* name_end;
! 3333:
! 3334: prev = p;
! 3335: r = fetch_name(&p, end, &name_end, env, 1);
! 3336: if (r < 0) return r;
! 3337:
! 3338: tok->type = TK_CALL;
! 3339: tok->u.call.name = prev;
! 3340: tok->u.call.name_end = name_end;
! 3341: }
! 3342: else
! 3343: PUNFETCH;
! 3344: }
! 3345: break;
! 3346: #endif
! 3347:
! 3348: case 'Q':
! 3349: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
! 3350: tok->type = TK_QUOTE_OPEN;
! 3351: }
! 3352: break;
! 3353:
! 3354: case 'p':
! 3355: case 'P':
! 3356: if (PPEEK_IS('{') &&
! 3357: IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
! 3358: PINC;
! 3359: tok->type = TK_CHAR_PROPERTY;
! 3360: tok->u.prop.not = (c == 'P' ? 1 : 0);
! 3361:
! 3362: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
! 3363: PFETCH(c);
! 3364: if (c == '^') {
! 3365: tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
! 3366: }
! 3367: else
! 3368: PUNFETCH;
! 3369: }
! 3370: }
! 3371: break;
! 3372:
! 3373: default:
! 3374: PUNFETCH;
! 3375: num = fetch_escaped_value(&p, end, env);
! 3376: if (num < 0) return num;
! 3377: /* set_raw: */
! 3378: if (tok->u.c != num) {
! 3379: tok->type = TK_CODE_POINT;
! 3380: tok->u.code = (OnigCodePoint )num;
! 3381: }
! 3382: else { /* string */
! 3383: p = tok->backp + enc_len(enc, tok->backp);
! 3384: }
! 3385: break;
! 3386: }
! 3387: }
! 3388: else {
! 3389: tok->u.c = c;
! 3390: tok->escaped = 0;
! 3391:
! 3392: #ifdef USE_VARIABLE_META_CHARS
! 3393: if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
! 3394: IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
! 3395: if (c == MC_ANYCHAR(enc))
! 3396: goto any_char;
! 3397: else if (c == MC_ANYTIME(enc))
! 3398: goto anytime;
! 3399: else if (c == MC_ZERO_OR_ONE_TIME(enc))
! 3400: goto zero_or_one_time;
! 3401: else if (c == MC_ONE_OR_MORE_TIME(enc))
! 3402: goto one_or_more_time;
! 3403: else if (c == MC_ANYCHAR_ANYTIME(enc)) {
! 3404: tok->type = TK_ANYCHAR_ANYTIME;
! 3405: goto out;
! 3406: }
! 3407: }
! 3408: #endif
! 3409:
! 3410: switch (c) {
! 3411: case '.':
! 3412: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
! 3413: #ifdef USE_VARIABLE_META_CHARS
! 3414: any_char:
! 3415: #endif
! 3416: tok->type = TK_ANYCHAR;
! 3417: break;
! 3418:
! 3419: case '*':
! 3420: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
! 3421: #ifdef USE_VARIABLE_META_CHARS
! 3422: anytime:
! 3423: #endif
! 3424: tok->type = TK_OP_REPEAT;
! 3425: tok->u.repeat.lower = 0;
! 3426: tok->u.repeat.upper = REPEAT_INFINITE;
! 3427: goto greedy_check;
! 3428: break;
! 3429:
! 3430: case '+':
! 3431: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
! 3432: #ifdef USE_VARIABLE_META_CHARS
! 3433: one_or_more_time:
! 3434: #endif
! 3435: tok->type = TK_OP_REPEAT;
! 3436: tok->u.repeat.lower = 1;
! 3437: tok->u.repeat.upper = REPEAT_INFINITE;
! 3438: goto greedy_check;
! 3439: break;
! 3440:
! 3441: case '?':
! 3442: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
! 3443: #ifdef USE_VARIABLE_META_CHARS
! 3444: zero_or_one_time:
! 3445: #endif
! 3446: tok->type = TK_OP_REPEAT;
! 3447: tok->u.repeat.lower = 0;
! 3448: tok->u.repeat.upper = 1;
! 3449: goto greedy_check;
! 3450: break;
! 3451:
! 3452: case '{':
! 3453: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
! 3454: r = fetch_range_quantifier(&p, end, tok, env);
! 3455: if (r < 0) return r; /* error */
! 3456: if (r == 0) goto greedy_check;
! 3457: else if (r == 2) { /* {n} */
! 3458: if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
! 3459: goto possessive_check;
! 3460:
! 3461: goto greedy_check;
! 3462: }
! 3463: /* r == 1 : normal char */
! 3464: break;
! 3465:
! 3466: case '|':
! 3467: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
! 3468: tok->type = TK_ALT;
! 3469: break;
! 3470:
! 3471: case '(':
! 3472: if (PPEEK_IS('?') &&
! 3473: IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
! 3474: PINC;
! 3475: if (PPEEK_IS('#')) {
! 3476: PFETCH(c);
! 3477: while (1) {
! 3478: if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
! 3479: PFETCH(c);
! 3480: if (c == MC_ESC(enc)) {
! 3481: if (!PEND) PFETCH(c);
! 3482: }
! 3483: else {
! 3484: if (c == ')') break;
! 3485: }
! 3486: }
! 3487: goto start;
! 3488: }
! 3489: PUNFETCH;
! 3490: }
! 3491:
! 3492: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
! 3493: tok->type = TK_SUBEXP_OPEN;
! 3494: break;
! 3495:
! 3496: case ')':
! 3497: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
! 3498: tok->type = TK_SUBEXP_CLOSE;
! 3499: break;
! 3500:
! 3501: case '^':
! 3502: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
! 3503: tok->type = TK_ANCHOR;
! 3504: tok->u.subtype = (IS_SINGLELINE(env->option)
! 3505: ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
! 3506: break;
! 3507:
! 3508: case '$':
! 3509: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
! 3510: tok->type = TK_ANCHOR;
! 3511: tok->u.subtype = (IS_SINGLELINE(env->option)
! 3512: ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
! 3513: break;
! 3514:
! 3515: case '[':
! 3516: if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
! 3517: tok->type = TK_CC_OPEN;
! 3518: break;
! 3519:
! 3520: case ']':
! 3521: if (*src > env->pattern) /* /].../ is allowed. */
! 3522: CCEND_ESC_WARN(env, (UChar* )"]");
! 3523: break;
! 3524:
! 3525: case '#':
! 3526: if (IS_EXTEND(env->option)) {
! 3527: while (!PEND) {
! 3528: PFETCH(c);
! 3529: if (ONIGENC_IS_CODE_NEWLINE(enc, c))
! 3530: break;
! 3531: }
! 3532: goto start;
! 3533: break;
! 3534: }
! 3535: break;
! 3536:
! 3537: case ' ': case '\t': case '\n': case '\r': case '\f':
! 3538: if (IS_EXTEND(env->option))
! 3539: goto start;
! 3540: break;
! 3541:
! 3542: default:
! 3543: /* string */
! 3544: break;
! 3545: }
! 3546: }
! 3547:
! 3548: #ifdef USE_VARIABLE_META_CHARS
! 3549: out:
! 3550: #endif
! 3551: *src = p;
! 3552: return tok->type;
! 3553: }
! 3554:
! 3555: static int
! 3556: add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
! 3557: const OnigCodePoint sbr[], const OnigCodePoint mbr[])
! 3558: {
! 3559: int i, r;
! 3560: OnigCodePoint j;
! 3561:
! 3562: int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
! 3563: int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
! 3564:
! 3565: if (not == 0) {
! 3566: for (i = 0; i < nsb; i++) {
! 3567: for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
! 3568: j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
! 3569: BITSET_SET_BIT(cc->bs, j);
! 3570: }
! 3571: }
! 3572:
! 3573: for (i = 0; i < nmb; i++) {
! 3574: r = add_code_range_to_buf(&(cc->mbuf),
! 3575: ONIGENC_CODE_RANGE_FROM(mbr, i),
! 3576: ONIGENC_CODE_RANGE_TO(mbr, i));
! 3577: if (r != 0) return r;
! 3578: }
! 3579: }
! 3580: else {
! 3581: OnigCodePoint prev = 0;
! 3582:
! 3583: if (ONIGENC_MBC_MINLEN(enc) == 1) {
! 3584: for (i = 0; i < nsb; i++) {
! 3585: for (j = prev;
! 3586: j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
! 3587: BITSET_SET_BIT(cc->bs, j);
! 3588: }
! 3589: prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
! 3590: }
! 3591: if (prev < 0x7f) {
! 3592: for (j = prev; j < 0x7f; j++) {
! 3593: BITSET_SET_BIT(cc->bs, j);
! 3594: }
! 3595: }
! 3596:
! 3597: prev = 0x80;
! 3598: }
! 3599:
! 3600: for (i = 0; i < nmb; i++) {
! 3601: if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
! 3602: r = add_code_range_to_buf(&(cc->mbuf), prev,
! 3603: ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
! 3604: if (r != 0) return r;
! 3605: }
! 3606: prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
! 3607: }
! 3608: if (prev < 0x7fffffff) {
! 3609: r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
! 3610: if (r != 0) return r;
! 3611: }
! 3612: }
! 3613:
! 3614: return 0;
! 3615: }
! 3616:
! 3617: static int
! 3618: add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
! 3619: {
! 3620: int c, r;
! 3621: const OnigCodePoint *sbr, *mbr;
! 3622: OnigEncoding enc = env->enc;
! 3623:
! 3624: r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
! 3625: if (r == 0) {
! 3626: return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
! 3627: }
! 3628: else if (r != ONIG_NO_SUPPORT_CONFIG) {
! 3629: return r;
! 3630: }
! 3631:
! 3632: r = 0;
! 3633: switch (ctype) {
! 3634: case ONIGENC_CTYPE_ALPHA:
! 3635: case ONIGENC_CTYPE_BLANK:
! 3636: case ONIGENC_CTYPE_CNTRL:
! 3637: case ONIGENC_CTYPE_DIGIT:
! 3638: case ONIGENC_CTYPE_LOWER:
! 3639: case ONIGENC_CTYPE_PUNCT:
! 3640: case ONIGENC_CTYPE_SPACE:
! 3641: case ONIGENC_CTYPE_UPPER:
! 3642: case ONIGENC_CTYPE_XDIGIT:
! 3643: case ONIGENC_CTYPE_ASCII:
! 3644: case ONIGENC_CTYPE_ALNUM:
! 3645: if (not != 0) {
! 3646: for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
! 3647: if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
! 3648: BITSET_SET_BIT(cc->bs, c);
! 3649: }
! 3650: ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
! 3651: }
! 3652: else {
! 3653: for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
! 3654: if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
! 3655: BITSET_SET_BIT(cc->bs, c);
! 3656: }
! 3657: }
! 3658: break;
! 3659:
! 3660: case ONIGENC_CTYPE_GRAPH:
! 3661: case ONIGENC_CTYPE_PRINT:
! 3662: if (not != 0) {
! 3663: for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
! 3664: if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
! 3665: BITSET_SET_BIT(cc->bs, c);
! 3666: }
! 3667: }
! 3668: else {
! 3669: for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
! 3670: if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
! 3671: BITSET_SET_BIT(cc->bs, c);
! 3672: }
! 3673: ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
! 3674: }
! 3675: break;
! 3676:
! 3677: case ONIGENC_CTYPE_WORD:
! 3678: if (not == 0) {
! 3679: for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
! 3680: if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
! 3681: }
! 3682: ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
! 3683: }
! 3684: else {
! 3685: for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
! 3686: if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
! 3687: && ! ONIGENC_IS_CODE_WORD(enc, c))
! 3688: BITSET_SET_BIT(cc->bs, c);
! 3689: }
! 3690: }
! 3691: break;
! 3692:
! 3693: default:
! 3694: return ONIGERR_PARSER_BUG;
! 3695: break;
! 3696: }
! 3697:
! 3698: return r;
! 3699: }
! 3700:
! 3701: static int
! 3702: parse_ctype_to_enc_ctype(int pctype, int* not)
! 3703: {
! 3704: int ctype;
! 3705:
! 3706: switch (pctype) {
! 3707: case CTYPE_WORD:
! 3708: ctype = ONIGENC_CTYPE_WORD;
! 3709: *not = 0;
! 3710: break;
! 3711: case CTYPE_NOT_WORD:
! 3712: ctype = ONIGENC_CTYPE_WORD;
! 3713: *not = 1;
! 3714: break;
! 3715: case CTYPE_WHITE_SPACE:
! 3716: ctype = ONIGENC_CTYPE_SPACE;
! 3717: *not = 0;
! 3718: break;
! 3719: case CTYPE_NOT_WHITE_SPACE:
! 3720: ctype = ONIGENC_CTYPE_SPACE;
! 3721: *not = 1;
! 3722: break;
! 3723: case CTYPE_DIGIT:
! 3724: ctype = ONIGENC_CTYPE_DIGIT;
! 3725: *not = 0;
! 3726: break;
! 3727: case CTYPE_NOT_DIGIT:
! 3728: ctype = ONIGENC_CTYPE_DIGIT;
! 3729: *not = 1;
! 3730: break;
! 3731: case CTYPE_XDIGIT:
! 3732: ctype = ONIGENC_CTYPE_XDIGIT;
! 3733: *not = 0;
! 3734: break;
! 3735: case CTYPE_NOT_XDIGIT:
! 3736: ctype = ONIGENC_CTYPE_XDIGIT;
! 3737: *not = 1;
! 3738: break;
! 3739: default:
! 3740: return ONIGERR_PARSER_BUG;
! 3741: break;
! 3742: }
! 3743: return ctype;
! 3744: }
! 3745:
! 3746: typedef struct {
! 3747: UChar *name;
! 3748: int ctype;
! 3749: short int len;
! 3750: } PosixBracketEntryType;
! 3751:
! 3752: static int
! 3753: parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
! 3754: {
! 3755: #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
! 3756: #define POSIX_BRACKET_NAME_MAX_LEN 6
! 3757:
! 3758: static PosixBracketEntryType PBS[] = {
! 3759: { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
! 3760: { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
! 3761: { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
! 3762: { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
! 3763: { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
! 3764: { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
! 3765: { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
! 3766: { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
! 3767: { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
! 3768: { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
! 3769: { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
! 3770: { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
! 3771: { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
! 3772: { (UChar* )NULL, -1, 0 }
! 3773: };
! 3774:
! 3775: PosixBracketEntryType *pb;
! 3776: int not, i, r;
! 3777: OnigCodePoint c;
! 3778: OnigEncoding enc = env->enc;
! 3779: UChar *p = *src;
! 3780: PFETCH_READY;
! 3781:
! 3782: if (PPEEK_IS('^')) {
! 3783: PINC;
! 3784: not = 1;
! 3785: }
! 3786: else
! 3787: not = 0;
! 3788:
! 3789: if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
! 3790: goto not_posix_bracket;
! 3791:
! 3792: for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
! 3793: if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
! 3794: p = (UChar* )onigenc_step(enc, p, end, pb->len);
! 3795: if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
! 3796: return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
! 3797:
! 3798: r = add_ctype_to_cc(cc, pb->ctype, not, env);
! 3799: if (r != 0) return r;
! 3800:
! 3801: PINC; PINC;
! 3802: *src = p;
! 3803: return 0;
! 3804: }
! 3805: }
! 3806:
! 3807: not_posix_bracket:
! 3808: c = 0;
! 3809: i = 0;
! 3810: while (!PEND && ((c = PPEEK) != ':') && c != ']') {
! 3811: PINC;
! 3812: if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
! 3813: }
! 3814: if (c == ':' && ! PEND) {
! 3815: PINC;
! 3816: if (! PEND) {
! 3817: PFETCH(c);
! 3818: if (c == ']')
! 3819: return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
! 3820: }
! 3821: }
! 3822:
! 3823: return 1; /* 1: is not POSIX bracket, but no error. */
! 3824: }
! 3825:
! 3826: static int
! 3827: property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
! 3828: {
! 3829: static PosixBracketEntryType PBS[] = {
! 3830: { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
! 3831: { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
! 3832: { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
! 3833: { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
! 3834: { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
! 3835: { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
! 3836: { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
! 3837: { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
! 3838: { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
! 3839: { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
! 3840: { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
! 3841: { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
! 3842: { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
! 3843: { (UChar* )NULL, -1, 0 }
! 3844: };
! 3845:
! 3846: PosixBracketEntryType *pb;
! 3847: int len;
! 3848:
! 3849: len = onigenc_strlen(enc, p, end);
! 3850: for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
! 3851: if (len == pb->len &&
! 3852: onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
! 3853: return pb->ctype;
! 3854: }
! 3855:
! 3856: return -1;
! 3857: }
! 3858:
! 3859: static int
! 3860: fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
! 3861: {
! 3862: int ctype;
! 3863: OnigCodePoint c;
! 3864: OnigEncoding enc = env->enc;
! 3865: UChar *prev, *start, *p = *src;
! 3866: PFETCH_READY;
! 3867:
! 3868: /* 'IsXXXX' => 'XXXX' */
! 3869: if (!PEND &&
! 3870: IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
! 3871: c = PPEEK;
! 3872: if (c == 'I') {
! 3873: PINC;
! 3874: if (! PEND) {
! 3875: c = PPEEK;
! 3876: if (c == 's')
! 3877: PINC;
! 3878: else
! 3879: PUNFETCH;
! 3880: }
! 3881: }
! 3882: }
! 3883:
! 3884: start = prev = p;
! 3885:
! 3886: while (!PEND) {
! 3887: prev = p;
! 3888: PFETCH(c);
! 3889: if (c == '}') {
! 3890: ctype = property_name_to_ctype(start, prev, enc);
! 3891: if (ctype < 0) break;
! 3892:
! 3893: *src = p;
! 3894: return ctype;
! 3895: }
! 3896: else if (c == '(' || c == ')' || c == '{' || c == '|')
! 3897: break;
! 3898: }
! 3899:
! 3900: onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
! 3901: *src, prev);
! 3902: return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
! 3903: }
! 3904:
! 3905: static int
! 3906: parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
! 3907: ScanEnv* env)
! 3908: {
! 3909: int r, ctype;
! 3910: CClassNode* cc;
! 3911:
! 3912: ctype = fetch_char_property_to_ctype(src, end, env);
! 3913: if (ctype < 0) return ctype;
! 3914:
! 3915: *np = node_new_cclass();
! 3916: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 3917: cc = &(NCCLASS(*np));
! 3918: r = add_ctype_to_cc(cc, ctype, 0, env);
! 3919: if (r != 0) return r;
! 3920: if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
! 3921:
! 3922: return 0;
! 3923: }
! 3924:
! 3925:
! 3926: enum CCSTATE {
! 3927: CCS_VALUE,
! 3928: CCS_RANGE,
! 3929: CCS_COMPLETE,
! 3930: CCS_START
! 3931: };
! 3932:
! 3933: enum CCVALTYPE {
! 3934: CCV_SB,
! 3935: CCV_CODE_POINT,
! 3936: CCV_CLASS
! 3937: };
! 3938:
! 3939: static int
! 3940: next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
! 3941: enum CCSTATE* state, ScanEnv* env)
! 3942: {
! 3943: int r;
! 3944:
! 3945: if (*state == CCS_RANGE)
! 3946: return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
! 3947:
! 3948: if (*state == CCS_VALUE && *type != CCV_CLASS) {
! 3949: if (*type == CCV_SB)
! 3950: BITSET_SET_BIT(cc->bs, (int )(*vs));
! 3951: else if (*type == CCV_CODE_POINT) {
! 3952: r = add_code_range(&(cc->mbuf), env, *vs, *vs);
! 3953: if (r < 0) return r;
! 3954: }
! 3955: }
! 3956:
! 3957: *state = CCS_VALUE;
! 3958: *type = CCV_CLASS;
! 3959: return 0;
! 3960: }
! 3961:
! 3962: static int
! 3963: next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
! 3964: int* vs_israw, int v_israw,
! 3965: enum CCVALTYPE intype, enum CCVALTYPE* type,
! 3966: enum CCSTATE* state, ScanEnv* env)
! 3967: {
! 3968: int r;
! 3969:
! 3970: switch (*state) {
! 3971: case CCS_VALUE:
! 3972: if (*type == CCV_SB)
! 3973: BITSET_SET_BIT(cc->bs, (int )(*vs));
! 3974: else if (*type == CCV_CODE_POINT) {
! 3975: r = add_code_range(&(cc->mbuf), env, *vs, *vs);
! 3976: if (r < 0) return r;
! 3977: }
! 3978: break;
! 3979:
! 3980: case CCS_RANGE:
! 3981: if (intype == *type) {
! 3982: if (intype == CCV_SB) {
! 3983: if (*vs > 0xff || v > 0xff)
! 3984: return ONIGERR_INVALID_WIDE_CHAR_VALUE;
! 3985:
! 3986: if (*vs > v) {
! 3987: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
! 3988: goto ccs_range_end;
! 3989: else
! 3990: return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
! 3991: }
! 3992: bitset_set_range(cc->bs, (int )*vs, (int )v);
! 3993: }
! 3994: else {
! 3995: r = add_code_range(&(cc->mbuf), env, *vs, v);
! 3996: if (r < 0) return r;
! 3997: }
! 3998: }
! 3999: else {
! 4000: #if 0
! 4001: if (intype == CCV_CODE_POINT && *type == CCV_SB) {
! 4002: #endif
! 4003: if (*vs > v) {
! 4004: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
! 4005: goto ccs_range_end;
! 4006: else
! 4007: return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
! 4008: }
! 4009: bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
! 4010: r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
! 4011: if (r < 0) return r;
! 4012: #if 0
! 4013: }
! 4014: else
! 4015: return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
! 4016: #endif
! 4017: }
! 4018: ccs_range_end:
! 4019: *state = CCS_COMPLETE;
! 4020: break;
! 4021:
! 4022: case CCS_COMPLETE:
! 4023: case CCS_START:
! 4024: *state = CCS_VALUE;
! 4025: break;
! 4026:
! 4027: default:
! 4028: break;
! 4029: }
! 4030:
! 4031: *vs_israw = v_israw;
! 4032: *vs = v;
! 4033: *type = intype;
! 4034: return 0;
! 4035: }
! 4036:
! 4037: static int
! 4038: code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
! 4039: OnigEncoding enc)
! 4040: {
! 4041: int in_esc;
! 4042: OnigCodePoint code;
! 4043: UChar* p = from;
! 4044: PFETCH_READY;
! 4045:
! 4046: in_esc = 0;
! 4047: while (! PEND) {
! 4048: if (ignore_escaped && in_esc) {
! 4049: in_esc = 0;
! 4050: }
! 4051: else {
! 4052: PFETCH(code);
! 4053: if (code == c) return 1;
! 4054: if (code == MC_ESC(enc)) in_esc = 1;
! 4055: }
! 4056: }
! 4057: return 0;
! 4058: }
! 4059:
! 4060: static int
! 4061: parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
! 4062: ScanEnv* env)
! 4063: {
! 4064: int r, neg, len, fetched, and_start;
! 4065: OnigCodePoint v, vs;
! 4066: UChar *p;
! 4067: Node* node;
! 4068: CClassNode *cc, *prev_cc;
! 4069: CClassNode work_cc;
! 4070:
! 4071: enum CCSTATE state;
! 4072: enum CCVALTYPE val_type, in_type;
! 4073: int val_israw, in_israw;
! 4074:
! 4075: prev_cc = (CClassNode* )NULL;
! 4076: *np = NULL_NODE;
! 4077: r = fetch_token_in_cc(tok, src, end, env);
! 4078: if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
! 4079: neg = 1;
! 4080: r = fetch_token_in_cc(tok, src, end, env);
! 4081: }
! 4082: else {
! 4083: neg = 0;
! 4084: }
! 4085:
! 4086: if (r < 0) return r;
! 4087: if (r == TK_CC_CLOSE) {
! 4088: if (! code_exist_check((OnigCodePoint )']',
! 4089: *src, env->pattern_end, 1, env->enc))
! 4090: return ONIGERR_EMPTY_CHAR_CLASS;
! 4091:
! 4092: CC_ESC_WARN(env, (UChar* )"]");
! 4093: r = tok->type = TK_CHAR; /* allow []...] */
! 4094: }
! 4095:
! 4096: *np = node = node_new_cclass();
! 4097: CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
! 4098: cc = &(NCCLASS(node));
! 4099:
! 4100: and_start = 0;
! 4101: state = CCS_START;
! 4102: p = *src;
! 4103: while (r != TK_CC_CLOSE) {
! 4104: fetched = 0;
! 4105: switch (r) {
! 4106: case TK_CHAR:
! 4107: len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
! 4108: if (len > 1) {
! 4109: in_type = CCV_CODE_POINT;
! 4110: }
! 4111: else {
! 4112: sb_char:
! 4113: in_type = CCV_SB;
! 4114: }
! 4115: v = (OnigCodePoint )tok->u.c;
! 4116: in_israw = 0;
! 4117: goto val_entry2;
! 4118: break;
! 4119:
! 4120: case TK_RAW_BYTE:
! 4121: /* tok->base != 0 : octal or hexadec. */
! 4122: if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
! 4123: UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
! 4124: UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
! 4125: UChar* psave = p;
! 4126: int i, base = tok->base;
! 4127:
! 4128: buf[0] = tok->u.c;
! 4129: for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
! 4130: r = fetch_token_in_cc(tok, &p, end, env);
! 4131: if (r < 0) goto err;
! 4132: if (r != TK_RAW_BYTE || tok->base != base) {
! 4133: fetched = 1;
! 4134: break;
! 4135: }
! 4136: buf[i] = tok->u.c;
! 4137: }
! 4138:
! 4139: if (i < ONIGENC_MBC_MINLEN(env->enc)) {
! 4140: r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
! 4141: goto err;
! 4142: }
! 4143:
! 4144: len = enc_len(env->enc, buf);
! 4145: if (i < len) {
! 4146: r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
! 4147: goto err;
! 4148: }
! 4149: else if (i > len) { /* fetch back */
! 4150: p = psave;
! 4151: for (i = 1; i < len; i++) {
! 4152: r = fetch_token_in_cc(tok, &p, end, env);
! 4153: }
! 4154: fetched = 0;
! 4155: }
! 4156:
! 4157: if (i == 1) {
! 4158: v = (OnigCodePoint )buf[0];
! 4159: goto raw_single;
! 4160: }
! 4161: else {
! 4162: v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
! 4163: in_type = CCV_CODE_POINT;
! 4164: }
! 4165: }
! 4166: else {
! 4167: v = (OnigCodePoint )tok->u.c;
! 4168: raw_single:
! 4169: in_type = CCV_SB;
! 4170: }
! 4171: in_israw = 1;
! 4172: goto val_entry2;
! 4173: break;
! 4174:
! 4175: case TK_CODE_POINT:
! 4176: v = tok->u.code;
! 4177: in_israw = 1;
! 4178: val_entry:
! 4179: len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
! 4180: if (len < 0) {
! 4181: r = len;
! 4182: goto err;
! 4183: }
! 4184: in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
! 4185: val_entry2:
! 4186: r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
! 4187: &state, env);
! 4188: if (r != 0) goto err;
! 4189: break;
! 4190:
! 4191: case TK_POSIX_BRACKET_OPEN:
! 4192: r = parse_posix_bracket(cc, &p, end, env);
! 4193: if (r < 0) goto err;
! 4194: if (r == 1) { /* is not POSIX bracket */
! 4195: CC_ESC_WARN(env, (UChar* )"[");
! 4196: p = tok->backp;
! 4197: v = (OnigCodePoint )tok->u.c;
! 4198: in_israw = 0;
! 4199: goto val_entry;
! 4200: }
! 4201: goto next_class;
! 4202: break;
! 4203:
! 4204: case TK_CHAR_TYPE:
! 4205: {
! 4206: int ctype, not;
! 4207: ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
! 4208: r = add_ctype_to_cc(cc, ctype, not, env);
! 4209: if (r != 0) return r;
! 4210: }
! 4211:
! 4212: next_class:
! 4213: r = next_state_class(cc, &vs, &val_type, &state, env);
! 4214: if (r != 0) goto err;
! 4215: break;
! 4216:
! 4217: case TK_CHAR_PROPERTY:
! 4218: {
! 4219: int ctype;
! 4220:
! 4221: ctype = fetch_char_property_to_ctype(&p, end, env);
! 4222: if (ctype < 0) return ctype;
! 4223: r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
! 4224: if (r != 0) return r;
! 4225: goto next_class;
! 4226: }
! 4227: break;
! 4228:
! 4229: case TK_CC_RANGE:
! 4230: if (state == CCS_VALUE) {
! 4231: r = fetch_token_in_cc(tok, &p, end, env);
! 4232: if (r < 0) goto err;
! 4233: fetched = 1;
! 4234: if (r == TK_CC_CLOSE) { /* allow [x-] */
! 4235: range_end_val:
! 4236: v = (OnigCodePoint )'-';
! 4237: in_israw = 0;
! 4238: goto val_entry;
! 4239: }
! 4240: else if (r == TK_CC_AND) {
! 4241: CC_ESC_WARN(env, (UChar* )"-");
! 4242: goto range_end_val;
! 4243: }
! 4244: state = CCS_RANGE;
! 4245: }
! 4246: else if (state == CCS_START) {
! 4247: /* [-xa] is allowed */
! 4248: v = (OnigCodePoint )tok->u.c;
! 4249: in_israw = 0;
! 4250:
! 4251: r = fetch_token_in_cc(tok, &p, end, env);
! 4252: if (r < 0) goto err;
! 4253: fetched = 1;
! 4254: /* [--x] or [a&&-x] is warned. */
! 4255: if (r == TK_CC_RANGE || and_start != 0)
! 4256: CC_ESC_WARN(env, (UChar* )"-");
! 4257:
! 4258: goto val_entry;
! 4259: }
! 4260: else if (state == CCS_RANGE) {
! 4261: CC_ESC_WARN(env, (UChar* )"-");
! 4262: goto sb_char; /* [!--x] is allowed */
! 4263: }
! 4264: else { /* CCS_COMPLETE */
! 4265: r = fetch_token_in_cc(tok, &p, end, env);
! 4266: if (r < 0) goto err;
! 4267: fetched = 1;
! 4268: if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
! 4269: else if (r == TK_CC_AND) {
! 4270: CC_ESC_WARN(env, (UChar* )"-");
! 4271: goto range_end_val;
! 4272: }
! 4273:
! 4274: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
! 4275: CC_ESC_WARN(env, (UChar* )"-");
! 4276: goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
! 4277: }
! 4278: r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
! 4279: goto err;
! 4280: }
! 4281: break;
! 4282:
! 4283: case TK_CC_CC_OPEN: /* [ */
! 4284: {
! 4285: Node *anode;
! 4286: CClassNode* acc;
! 4287:
! 4288: r = parse_char_class(&anode, tok, &p, end, env);
! 4289: if (r != 0) goto cc_open_err;
! 4290: acc = &(NCCLASS(anode));
! 4291: r = or_cclass(cc, acc, env->enc);
! 4292:
! 4293: onig_node_free(anode);
! 4294: cc_open_err:
! 4295: if (r != 0) goto err;
! 4296: }
! 4297: break;
! 4298:
! 4299: case TK_CC_AND: /* && */
! 4300: {
! 4301: if (state == CCS_VALUE) {
! 4302: r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
! 4303: &val_type, &state, env);
! 4304: if (r != 0) goto err;
! 4305: }
! 4306: /* initialize local variables */
! 4307: and_start = 1;
! 4308: state = CCS_START;
! 4309:
! 4310: if (IS_NOT_NULL(prev_cc)) {
! 4311: r = and_cclass(prev_cc, cc, env->enc);
! 4312: if (r != 0) goto err;
! 4313: bbuf_free(cc->mbuf);
! 4314: }
! 4315: else {
! 4316: prev_cc = cc;
! 4317: cc = &work_cc;
! 4318: }
! 4319: initialize_cclass(cc);
! 4320: }
! 4321: break;
! 4322:
! 4323: case TK_EOT:
! 4324: r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
! 4325: goto err;
! 4326: break;
! 4327: default:
! 4328: r = ONIGERR_PARSER_BUG;
! 4329: goto err;
! 4330: break;
! 4331: }
! 4332:
! 4333: if (fetched)
! 4334: r = tok->type;
! 4335: else {
! 4336: r = fetch_token_in_cc(tok, &p, end, env);
! 4337: if (r < 0) goto err;
! 4338: }
! 4339: }
! 4340:
! 4341: if (state == CCS_VALUE) {
! 4342: r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
! 4343: &val_type, &state, env);
! 4344: if (r != 0) goto err;
! 4345: }
! 4346:
! 4347: if (IS_NOT_NULL(prev_cc)) {
! 4348: r = and_cclass(prev_cc, cc, env->enc);
! 4349: if (r != 0) goto err;
! 4350: bbuf_free(cc->mbuf);
! 4351: cc = prev_cc;
! 4352: }
! 4353:
! 4354: if (neg != 0)
! 4355: CCLASS_SET_NOT(cc);
! 4356: else
! 4357: CCLASS_CLEAR_NOT(cc);
! 4358: if (IS_CCLASS_NOT(cc) &&
! 4359: IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
! 4360: int is_empty;
! 4361:
! 4362: is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
! 4363: if (is_empty != 0)
! 4364: BITSET_IS_EMPTY(cc->bs, is_empty);
! 4365:
! 4366: if (is_empty == 0) {
! 4367: #define NEWLINE_CODE 0x0a
! 4368:
! 4369: if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
! 4370: if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
! 4371: BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
! 4372: else
! 4373: add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
! 4374: }
! 4375: }
! 4376: }
! 4377: *src = p;
! 4378: return 0;
! 4379:
! 4380: err:
! 4381: if (cc != &(NCCLASS(*np)))
! 4382: bbuf_free(cc->mbuf);
! 4383: onig_node_free(*np);
! 4384: return r;
! 4385: }
! 4386:
! 4387: static int parse_subexp(Node** top, OnigToken* tok, int term,
! 4388: UChar** src, UChar* end, ScanEnv* env);
! 4389:
! 4390: static int
! 4391: parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
! 4392: ScanEnv* env)
! 4393: {
! 4394: int r, num;
! 4395: int list_capture;
! 4396: Node *target;
! 4397: OnigOptionType option;
! 4398: OnigEncoding enc = env->enc;
! 4399: OnigCodePoint c;
! 4400: UChar* p = *src;
! 4401: PFETCH_READY;
! 4402:
! 4403: *np = NULL;
! 4404: if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
! 4405:
! 4406: option = env->option;
! 4407: if (PPEEK_IS('?') &&
! 4408: IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
! 4409: PINC;
! 4410: if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
! 4411:
! 4412: PFETCH(c);
! 4413: switch (c) {
! 4414: case ':': /* (?:...) grouping only */
! 4415: group:
! 4416: r = fetch_token(tok, &p, end, env);
! 4417: if (r < 0) return r;
! 4418: r = parse_subexp(np, tok, term, &p, end, env);
! 4419: if (r < 0) return r;
! 4420: *src = p;
! 4421: return 1; /* group */
! 4422: break;
! 4423:
! 4424: case '=':
! 4425: *np = onig_node_new_anchor(ANCHOR_PREC_READ);
! 4426: break;
! 4427: case '!': /* preceding read */
! 4428: *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
! 4429: break;
! 4430: case '>': /* (?>...) stop backtrack */
! 4431: *np = node_new_effect(EFFECT_STOP_BACKTRACK);
! 4432: break;
! 4433:
! 4434: case '<': /* look behind (?<=...), (?<!...) */
! 4435: PFETCH(c);
! 4436: if (c == '=')
! 4437: *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
! 4438: else if (c == '!')
! 4439: *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
! 4440: #ifdef USE_NAMED_GROUP
! 4441: else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
! 4442: UChar *name;
! 4443: UChar *name_end;
! 4444:
! 4445: PUNFETCH;
! 4446: list_capture = 0;
! 4447:
! 4448: named_group:
! 4449: name = p;
! 4450: r = fetch_name(&p, end, &name_end, env, 0);
! 4451: if (r < 0) return r;
! 4452:
! 4453: num = scan_env_add_mem_entry(env);
! 4454: if (num < 0) return num;
! 4455: if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
! 4456: return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
! 4457:
! 4458: r = name_add(env->reg, name, name_end, num, env);
! 4459: if (r != 0) return r;
! 4460: *np = node_new_effect_memory(env->option, 1);
! 4461: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4462: NEFFECT(*np).regnum = num;
! 4463: if (list_capture != 0)
! 4464: BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
! 4465: env->num_named++;
! 4466: }
! 4467: #endif
! 4468: else
! 4469: return ONIGERR_UNDEFINED_GROUP_OPTION;
! 4470: break;
! 4471:
! 4472: case '@':
! 4473: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
! 4474: #ifdef USE_NAMED_GROUP
! 4475: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
! 4476: PFETCH(c);
! 4477: if (c == '<') {
! 4478: list_capture = 1;
! 4479: goto named_group; /* (?@<name>...) */
! 4480: }
! 4481: PUNFETCH;
! 4482: }
! 4483: #endif
! 4484: *np = node_new_effect_memory(env->option, 0);
! 4485: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4486: num = scan_env_add_mem_entry(env);
! 4487: if (num < 0) {
! 4488: onig_node_free(*np);
! 4489: return num;
! 4490: }
! 4491: else if (num >= BIT_STATUS_BITS_NUM) {
! 4492: onig_node_free(*np);
! 4493: return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
! 4494: }
! 4495: NEFFECT(*np).regnum = num;
! 4496: BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
! 4497: }
! 4498: else {
! 4499: return ONIGERR_UNDEFINED_GROUP_OPTION;
! 4500: }
! 4501: break;
! 4502:
! 4503: #ifdef USE_POSIXLINE_OPTION
! 4504: case 'p':
! 4505: #endif
! 4506: case '-': case 'i': case 'm': case 's': case 'x':
! 4507: {
! 4508: int neg = 0;
! 4509:
! 4510: while (1) {
! 4511: switch (c) {
! 4512: case ':':
! 4513: case ')':
! 4514: break;
! 4515:
! 4516: case '-': neg = 1; break;
! 4517: case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
! 4518: case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
! 4519: case 's':
! 4520: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
! 4521: ONOFF(option, ONIG_OPTION_MULTILINE, neg);
! 4522: }
! 4523: else
! 4524: return ONIGERR_UNDEFINED_GROUP_OPTION;
! 4525: break;
! 4526:
! 4527: case 'm':
! 4528: if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
! 4529: ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
! 4530: }
! 4531: else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
! 4532: ONOFF(option, ONIG_OPTION_MULTILINE, neg);
! 4533: }
! 4534: else
! 4535: return ONIGERR_UNDEFINED_GROUP_OPTION;
! 4536: break;
! 4537: #ifdef USE_POSIXLINE_OPTION
! 4538: case 'p':
! 4539: ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
! 4540: break;
! 4541: #endif
! 4542: default:
! 4543: return ONIGERR_UNDEFINED_GROUP_OPTION;
! 4544: }
! 4545:
! 4546: if (c == ')') {
! 4547: *np = node_new_option(option);
! 4548: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4549: *src = p;
! 4550: return 2; /* option only */
! 4551: }
! 4552: else if (c == ':') {
! 4553: OnigOptionType prev = env->option;
! 4554:
! 4555: env->option = option;
! 4556: r = fetch_token(tok, &p, end, env);
! 4557: if (r < 0) return r;
! 4558: r = parse_subexp(&target, tok, term, &p, end, env);
! 4559: env->option = prev;
! 4560: if (r < 0) return r;
! 4561: *np = node_new_option(option);
! 4562: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4563: NEFFECT(*np).target = target;
! 4564: *src = p;
! 4565: return 0;
! 4566: }
! 4567:
! 4568: if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
! 4569: PFETCH(c);
! 4570: }
! 4571: }
! 4572: break;
! 4573:
! 4574: default:
! 4575: return ONIGERR_UNDEFINED_GROUP_OPTION;
! 4576: }
! 4577: }
! 4578: else {
! 4579: if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
! 4580: goto group;
! 4581:
! 4582: *np = node_new_effect_memory(env->option, 0);
! 4583: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4584: num = scan_env_add_mem_entry(env);
! 4585: if (num < 0) return num;
! 4586: NEFFECT(*np).regnum = num;
! 4587: }
! 4588:
! 4589: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4590: r = fetch_token(tok, &p, end, env);
! 4591: if (r < 0) return r;
! 4592: r = parse_subexp(&target, tok, term, &p, end, env);
! 4593: if (r < 0) return r;
! 4594:
! 4595: if (NTYPE(*np) == N_ANCHOR)
! 4596: NANCHOR(*np).target = target;
! 4597: else {
! 4598: NEFFECT(*np).target = target;
! 4599: if (NEFFECT(*np).type == EFFECT_MEMORY) {
! 4600: /* Don't move this to previous of parse_subexp() */
! 4601: r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
! 4602: if (r != 0) return r;
! 4603: }
! 4604: }
! 4605:
! 4606: *src = p;
! 4607: return 0;
! 4608: }
! 4609:
! 4610: static const char* PopularQStr[] = {
! 4611: "?", "*", "+", "??", "*?", "+?"
! 4612: };
! 4613:
! 4614: static const char* ReduceQStr[] = {
! 4615: "", "", "*", "*?", "??", "+ and ??", "+? and ?"
! 4616: };
! 4617:
! 4618: static int
! 4619: set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
! 4620: {
! 4621: QuantifierNode* qn;
! 4622:
! 4623: qn = &(NQUANTIFIER(qnode));
! 4624: if (qn->lower == 1 && qn->upper == 1) {
! 4625: return 1;
! 4626: }
! 4627:
! 4628: switch (NTYPE(target)) {
! 4629: case N_STRING:
! 4630: if (! group) {
! 4631: StrNode* sn = &(NSTRING(target));
! 4632: if (str_node_can_be_split(sn, env->enc)) {
! 4633: Node* n = str_node_split_last_char(sn, env->enc);
! 4634: if (IS_NOT_NULL(n)) {
! 4635: qn->target = n;
! 4636: return 2;
! 4637: }
! 4638: }
! 4639: }
! 4640: break;
! 4641:
! 4642: case N_QUANTIFIER:
! 4643: { /* check redundant double repeat. */
! 4644: /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
! 4645: QuantifierNode* qnt = &(NQUANTIFIER(target));
! 4646: int nestq_num = popular_quantifier_num(qn);
! 4647: int targetq_num = popular_quantifier_num(qnt);
! 4648:
! 4649: #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
! 4650: if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
! 4651: IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
! 4652: UChar buf[WARN_BUFSIZE];
! 4653:
! 4654: switch(ReduceTypeTable[targetq_num][nestq_num]) {
! 4655: case RQ_ASIS:
! 4656: break;
! 4657:
! 4658: case RQ_DEL:
! 4659: if (onig_verb_warn != onig_null_warn) {
! 4660: onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
! 4661: env->pattern, env->pattern_end,
! 4662: (UChar* )"redundant nested repeat operator");
! 4663: (*onig_verb_warn)((char* )buf);
! 4664: }
! 4665: goto warn_exit;
! 4666: break;
! 4667:
! 4668: default:
! 4669: if (onig_verb_warn != onig_null_warn) {
! 4670: onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
! 4671: env->pattern, env->pattern_end,
! 4672: (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
! 4673: PopularQStr[targetq_num], PopularQStr[nestq_num],
! 4674: ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
! 4675: (*onig_verb_warn)((char* )buf);
! 4676: }
! 4677: goto warn_exit;
! 4678: break;
! 4679: }
! 4680: }
! 4681:
! 4682: warn_exit:
! 4683: #endif
! 4684: if (targetq_num >= 0) {
! 4685: if (nestq_num >= 0) {
! 4686: onig_reduce_nested_quantifier(qnode, target);
! 4687: goto q_exit;
! 4688: }
! 4689: else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
! 4690: /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
! 4691: if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
! 4692: qn->upper = (qn->lower == 0 ? 1 : qn->lower);
! 4693: }
! 4694: }
! 4695: }
! 4696: }
! 4697: break;
! 4698:
! 4699: default:
! 4700: break;
! 4701: }
! 4702:
! 4703: qn->target = target;
! 4704: q_exit:
! 4705: return 0;
! 4706: }
! 4707:
! 4708: #ifdef USE_SHARED_CCLASS_TABLE
! 4709:
! 4710: #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
! 4711:
! 4712: /* for ctype node hash table */
! 4713:
! 4714: typedef struct {
! 4715: OnigEncoding enc;
! 4716: int not;
! 4717: int type;
! 4718: } type_cclass_key;
! 4719:
! 4720: static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
! 4721: {
! 4722: if (x->type != y->type) return 1;
! 4723: if (x->enc != y->enc) return 1;
! 4724: if (x->not != y->not) return 1;
! 4725: return 0;
! 4726: }
! 4727:
! 4728: static int type_cclass_hash(type_cclass_key* key)
! 4729: {
! 4730: int i, val;
! 4731: unsigned char *p;
! 4732:
! 4733: val = 0;
! 4734:
! 4735: p = (unsigned char* )&(key->enc);
! 4736: for (i = 0; i < sizeof(key->enc); i++) {
! 4737: val = val * 997 + (int )*p++;
! 4738: }
! 4739:
! 4740: p = (unsigned char* )(&key->type);
! 4741: for (i = 0; i < sizeof(key->type); i++) {
! 4742: val = val * 997 + (int )*p++;
! 4743: }
! 4744:
! 4745: val += key->not;
! 4746: return val + (val >> 5);
! 4747: }
! 4748:
! 4749: static struct st_hash_type type_type_cclass_hash = {
! 4750: type_cclass_cmp,
! 4751: type_cclass_hash,
! 4752: };
! 4753:
! 4754: static st_table* OnigTypeCClassTable;
! 4755:
! 4756:
! 4757: static int
! 4758: i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
! 4759: {
! 4760: if (IS_NOT_NULL(node)) {
! 4761: CClassNode* cc = &(NCCLASS(node));
! 4762: if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
! 4763: xfree(node);
! 4764: }
! 4765:
! 4766: if (IS_NOT_NULL(key)) xfree(key);
! 4767: return ST_DELETE;
! 4768: }
! 4769:
! 4770: extern int
! 4771: onig_free_shared_cclass_table(void)
! 4772: {
! 4773: if (IS_NOT_NULL(OnigTypeCClassTable)) {
! 4774: onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
! 4775: onig_st_free_table(OnigTypeCClassTable);
! 4776: OnigTypeCClassTable = NULL;
! 4777: }
! 4778:
! 4779: return 0;
! 4780: }
! 4781:
! 4782: #endif /* USE_SHARED_CCLASS_TABLE */
! 4783:
! 4784:
! 4785: static int
! 4786: parse_exp(Node** np, OnigToken* tok, int term,
! 4787: UChar** src, UChar* end, ScanEnv* env)
! 4788: {
! 4789: int r, len, group = 0;
! 4790: Node* qn;
! 4791: Node** targetp;
! 4792:
! 4793: *np = NULL;
! 4794: if (tok->type == term)
! 4795: goto end_of_token;
! 4796:
! 4797: switch (tok->type) {
! 4798: case TK_ALT:
! 4799: case TK_EOT:
! 4800: end_of_token:
! 4801: *np = node_new_empty();
! 4802: return tok->type;
! 4803: break;
! 4804:
! 4805: case TK_SUBEXP_OPEN:
! 4806: r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env);
! 4807: if (r < 0) return r;
! 4808: if (r == 1) group = 1;
! 4809: else if (r == 2) { /* option only */
! 4810: Node* target;
! 4811: OnigOptionType prev = env->option;
! 4812:
! 4813: env->option = NEFFECT(*np).option;
! 4814: r = fetch_token(tok, src, end, env);
! 4815: if (r < 0) return r;
! 4816: r = parse_subexp(&target, tok, term, src, end, env);
! 4817: env->option = prev;
! 4818: if (r < 0) return r;
! 4819: NEFFECT(*np).target = target;
! 4820: return tok->type;
! 4821: }
! 4822: break;
! 4823:
! 4824: case TK_SUBEXP_CLOSE:
! 4825: if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
! 4826: return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
! 4827:
! 4828: if (tok->escaped) goto tk_raw_byte;
! 4829: else goto tk_byte;
! 4830: break;
! 4831:
! 4832: case TK_STRING:
! 4833: tk_byte:
! 4834: {
! 4835: *np = node_new_str(tok->backp, *src);
! 4836: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4837:
! 4838: while (1) {
! 4839: r = fetch_token(tok, src, end, env);
! 4840: if (r < 0) return r;
! 4841: if (r != TK_STRING) break;
! 4842:
! 4843: r = onig_node_str_cat(*np, tok->backp, *src);
! 4844: if (r < 0) return r;
! 4845: }
! 4846:
! 4847: string_end:
! 4848: targetp = np;
! 4849: goto repeat;
! 4850: }
! 4851: break;
! 4852:
! 4853: case TK_RAW_BYTE:
! 4854: tk_raw_byte:
! 4855: {
! 4856: *np = node_new_str_char((UChar )tok->u.c);
! 4857: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4858: len = 1;
! 4859: while (1) {
! 4860: if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
! 4861: if (len == enc_len(env->enc, NSTRING(*np).s)) {
! 4862: r = fetch_token(tok, src, end, env);
! 4863: goto string_end;
! 4864: }
! 4865: }
! 4866:
! 4867: r = fetch_token(tok, src, end, env);
! 4868: if (r < 0) return r;
! 4869: if (r != TK_RAW_BYTE) {
! 4870: #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
! 4871: int rem;
! 4872: if (len < ONIGENC_MBC_MINLEN(env->enc)) {
! 4873: rem = ONIGENC_MBC_MINLEN(env->enc) - len;
! 4874: (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
! 4875: if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
! 4876: goto string_end;
! 4877: }
! 4878: }
! 4879: #endif
! 4880: return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
! 4881: }
! 4882:
! 4883: r = node_str_cat_char(*np, (UChar )tok->u.c);
! 4884: if (r < 0) return r;
! 4885:
! 4886: len++;
! 4887: }
! 4888: }
! 4889: break;
! 4890:
! 4891: case TK_CODE_POINT:
! 4892: {
! 4893: UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
! 4894: int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
! 4895: if (num < 0) return num;
! 4896: #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
! 4897: *np = node_new_str_raw(buf, buf + num);
! 4898: #else
! 4899: *np = node_new_str(buf, buf + num);
! 4900: #endif
! 4901: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4902: }
! 4903: break;
! 4904:
! 4905: case TK_QUOTE_OPEN:
! 4906: {
! 4907: OnigCodePoint end_op[2];
! 4908: UChar *qstart, *qend, *nextp;
! 4909:
! 4910: end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
! 4911: end_op[1] = (OnigCodePoint )'E';
! 4912: qstart = *src;
! 4913: qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
! 4914: if (IS_NULL(qend)) {
! 4915: nextp = qend = end;
! 4916: }
! 4917: *np = node_new_str(qstart, qend);
! 4918: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4919: *src = nextp;
! 4920: }
! 4921: break;
! 4922:
! 4923: case TK_CHAR_TYPE:
! 4924: {
! 4925: switch (tok->u.subtype) {
! 4926: case CTYPE_WORD:
! 4927: case CTYPE_NOT_WORD:
! 4928: *np = node_new_ctype(tok->u.subtype);
! 4929: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4930: break;
! 4931:
! 4932: case CTYPE_WHITE_SPACE:
! 4933: case CTYPE_NOT_WHITE_SPACE:
! 4934: case CTYPE_DIGIT:
! 4935: case CTYPE_NOT_DIGIT:
! 4936: case CTYPE_XDIGIT:
! 4937: case CTYPE_NOT_XDIGIT:
! 4938: {
! 4939: CClassNode* cc;
! 4940: int ctype, not;
! 4941:
! 4942: #ifdef USE_SHARED_CCLASS_TABLE
! 4943: const OnigCodePoint *sbr, *mbr;
! 4944:
! 4945: ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
! 4946: r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
! 4947: if (r == 0 &&
! 4948: ONIGENC_CODE_RANGE_NUM(mbr)
! 4949: >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
! 4950: type_cclass_key key;
! 4951: type_cclass_key* new_key;
! 4952:
! 4953: key.enc = env->enc;
! 4954: key.not = not;
! 4955: key.type = ctype;
! 4956:
! 4957: THREAD_ATOMIC_START;
! 4958:
! 4959: if (IS_NULL(OnigTypeCClassTable)) {
! 4960: OnigTypeCClassTable
! 4961: = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
! 4962: if (IS_NULL(OnigTypeCClassTable)) {
! 4963: THREAD_ATOMIC_END;
! 4964: return ONIGERR_MEMORY;
! 4965: }
! 4966: }
! 4967: else {
! 4968: if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
! 4969: (st_data_t* )np)) {
! 4970: THREAD_ATOMIC_END;
! 4971: break;
! 4972: }
! 4973: }
! 4974:
! 4975: *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
! 4976: if (IS_NULL(*np)) {
! 4977: THREAD_ATOMIC_END;
! 4978: return ONIGERR_MEMORY;
! 4979: }
! 4980:
! 4981: CCLASS_SET_SHARE(&(NCCLASS(*np)));
! 4982: new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
! 4983: xmemcpy(new_key, &key, sizeof(type_cclass_key));
! 4984: onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
! 4985: (st_data_t )*np);
! 4986:
! 4987: THREAD_ATOMIC_END;
! 4988: }
! 4989: else {
! 4990: #endif
! 4991: ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
! 4992: *np = node_new_cclass();
! 4993: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 4994: cc = &(NCCLASS(*np));
! 4995: add_ctype_to_cc(cc, ctype, 0, env);
! 4996: if (not != 0) CCLASS_SET_NOT(cc);
! 4997: #ifdef USE_SHARED_CCLASS_TABLE
! 4998: }
! 4999: #endif
! 5000: }
! 5001: break;
! 5002:
! 5003: default:
! 5004: return ONIGERR_PARSER_BUG;
! 5005: break;
! 5006: }
! 5007: }
! 5008: break;
! 5009:
! 5010: case TK_CHAR_PROPERTY:
! 5011: r = parse_char_property(np, tok, src, end, env);
! 5012: if (r != 0) return r;
! 5013: break;
! 5014:
! 5015: case TK_CC_OPEN:
! 5016: {
! 5017: CClassNode* cc;
! 5018:
! 5019: r = parse_char_class(np, tok, src, end, env);
! 5020: if (r != 0) return r;
! 5021:
! 5022: cc = &(NCCLASS(*np));
! 5023:
! 5024: if (IS_IGNORECASE(env->option)) {
! 5025: int i, n, in_cc;
! 5026: const OnigPairAmbigCodes* ccs;
! 5027: BitSetRef bs = cc->bs;
! 5028: OnigAmbigType amb;
! 5029:
! 5030: for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
! 5031: if ((amb & env->ambig_flag) == 0) continue;
! 5032:
! 5033: n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
! 5034: for (i = 0; i < n; i++) {
! 5035: in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
! 5036:
! 5037: if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
! 5038: (in_cc == 0 && IS_CCLASS_NOT(cc))) {
! 5039: if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
! 5040: ccs[i].from >= SINGLE_BYTE_SIZE) {
! 5041: /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
! 5042: add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
! 5043: }
! 5044: else {
! 5045: if (BITSET_AT(bs, ccs[i].from)) {
! 5046: /* /(?i:[^A-C])/.match("a") ==> fail. */
! 5047: BITSET_SET_BIT(bs, ccs[i].to);
! 5048: }
! 5049: if (BITSET_AT(bs, ccs[i].to)) {
! 5050: BITSET_SET_BIT(bs, ccs[i].from);
! 5051: }
! 5052: }
! 5053: }
! 5054: }
! 5055: }
! 5056: }
! 5057: }
! 5058: break;
! 5059:
! 5060: case TK_ANYCHAR:
! 5061: *np = node_new_anychar();
! 5062: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 5063: break;
! 5064:
! 5065: case TK_ANYCHAR_ANYTIME:
! 5066: *np = node_new_anychar();
! 5067: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 5068: qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
! 5069: CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
! 5070: NQUANTIFIER(qn).target = *np;
! 5071: *np = qn;
! 5072: break;
! 5073:
! 5074: case TK_BACKREF:
! 5075: len = tok->u.backref.num;
! 5076: *np = node_new_backref(len,
! 5077: (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
! 5078: tok->u.backref.by_name,
! 5079: #ifdef USE_BACKREF_AT_LEVEL
! 5080: tok->u.backref.exist_level,
! 5081: tok->u.backref.level,
! 5082: #endif
! 5083: env);
! 5084: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 5085: break;
! 5086:
! 5087: #ifdef USE_SUBEXP_CALL
! 5088: case TK_CALL:
! 5089: *np = node_new_call(tok->u.call.name, tok->u.call.name_end);
! 5090: CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
! 5091: env->num_call++;
! 5092: break;
! 5093: #endif
! 5094:
! 5095: case TK_ANCHOR:
! 5096: *np = onig_node_new_anchor(tok->u.anchor);
! 5097: break;
! 5098:
! 5099: case TK_OP_REPEAT:
! 5100: case TK_INTERVAL:
! 5101: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
! 5102: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
! 5103: return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
! 5104: else
! 5105: *np = node_new_empty();
! 5106: }
! 5107: else {
! 5108: goto tk_byte;
! 5109: }
! 5110: break;
! 5111:
! 5112: default:
! 5113: return ONIGERR_PARSER_BUG;
! 5114: break;
! 5115: }
! 5116:
! 5117: {
! 5118: targetp = np;
! 5119:
! 5120: re_entry:
! 5121: r = fetch_token(tok, src, end, env);
! 5122: if (r < 0) return r;
! 5123:
! 5124: repeat:
! 5125: if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
! 5126: if (is_invalid_quantifier_target(*targetp))
! 5127: return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
! 5128:
! 5129: qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
! 5130: (r == TK_INTERVAL ? 1 : 0));
! 5131: CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
! 5132: NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
! 5133: r = set_quantifier(qn, *targetp, group, env);
! 5134: if (r < 0) return r;
! 5135:
! 5136: if (tok->u.repeat.possessive != 0) {
! 5137: Node* en;
! 5138: en = node_new_effect(EFFECT_STOP_BACKTRACK);
! 5139: CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
! 5140: NEFFECT(en).target = qn;
! 5141: qn = en;
! 5142: }
! 5143:
! 5144: if (r == 0) {
! 5145: *targetp = qn;
! 5146: }
! 5147: else if (r == 2) { /* split case: /abc+/ */
! 5148: Node *tmp;
! 5149:
! 5150: *targetp = node_new_list(*targetp, NULL);
! 5151: CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY);
! 5152: tmp = NCONS(*targetp).right = node_new_list(qn, NULL);
! 5153: CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY);
! 5154: targetp = &(NCONS(tmp).left);
! 5155: }
! 5156: goto re_entry;
! 5157: }
! 5158: }
! 5159:
! 5160: return r;
! 5161: }
! 5162:
! 5163: static int
! 5164: parse_branch(Node** top, OnigToken* tok, int term,
! 5165: UChar** src, UChar* end, ScanEnv* env)
! 5166: {
! 5167: int r;
! 5168: Node *node, **headp;
! 5169:
! 5170: *top = NULL;
! 5171: r = parse_exp(&node, tok, term, src, end, env);
! 5172: if (r < 0) return r;
! 5173:
! 5174: if (r == TK_EOT || r == term || r == TK_ALT) {
! 5175: *top = node;
! 5176: }
! 5177: else {
! 5178: *top = node_new_list(node, NULL);
! 5179: headp = &(NCONS(*top).right);
! 5180: while (r != TK_EOT && r != term && r != TK_ALT) {
! 5181: r = parse_exp(&node, tok, term, src, end, env);
! 5182: if (r < 0) return r;
! 5183:
! 5184: if (NTYPE(node) == N_LIST) {
! 5185: *headp = node;
! 5186: while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right;
! 5187: headp = &(NCONS(node).right);
! 5188: }
! 5189: else {
! 5190: *headp = node_new_list(node, NULL);
! 5191: headp = &(NCONS(*headp).right);
! 5192: }
! 5193: }
! 5194: }
! 5195:
! 5196: return r;
! 5197: }
! 5198:
! 5199: /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
! 5200: static int
! 5201: parse_subexp(Node** top, OnigToken* tok, int term,
! 5202: UChar** src, UChar* end, ScanEnv* env)
! 5203: {
! 5204: int r;
! 5205: Node *node, **headp;
! 5206:
! 5207: *top = NULL;
! 5208: r = parse_branch(&node, tok, term, src, end, env);
! 5209: if (r < 0) {
! 5210: onig_node_free(node);
! 5211: return r;
! 5212: }
! 5213:
! 5214: if (r == term) {
! 5215: *top = node;
! 5216: }
! 5217: else if (r == TK_ALT) {
! 5218: *top = node_new_alt(node, NULL);
! 5219: headp = &(NCONS(*top).right);
! 5220: while (r == TK_ALT) {
! 5221: r = fetch_token(tok, src, end, env);
! 5222: if (r < 0) return r;
! 5223: r = parse_branch(&node, tok, term, src, end, env);
! 5224: if (r < 0) return r;
! 5225:
! 5226: *headp = node_new_alt(node, NULL);
! 5227: headp = &(NCONS(*headp).right);
! 5228: }
! 5229:
! 5230: if (tok->type != term)
! 5231: goto err;
! 5232: }
! 5233: else {
! 5234: err:
! 5235: if (term == TK_SUBEXP_CLOSE)
! 5236: return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
! 5237: else
! 5238: return ONIGERR_PARSER_BUG;
! 5239: }
! 5240:
! 5241: return r;
! 5242: }
! 5243:
! 5244: static int
! 5245: parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
! 5246: {
! 5247: int r;
! 5248: OnigToken tok;
! 5249:
! 5250: r = fetch_token(&tok, src, end, env);
! 5251: if (r < 0) return r;
! 5252: r = parse_subexp(top, &tok, TK_EOT, src, end, env);
! 5253: if (r < 0) return r;
! 5254: return 0;
! 5255: }
! 5256:
! 5257: extern int
! 5258: onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg,
! 5259: ScanEnv* env)
! 5260: {
! 5261: int r;
! 5262: UChar* p;
! 5263:
! 5264: #ifdef USE_NAMED_GROUP
! 5265: names_clear(reg);
! 5266: #endif
! 5267:
! 5268: scan_env_clear(env);
! 5269: env->option = reg->options;
! 5270: env->ambig_flag = reg->ambig_flag;
! 5271: env->enc = reg->enc;
! 5272: env->syntax = reg->syntax;
! 5273: env->pattern = (UChar* )pattern;
! 5274: env->pattern_end = (UChar* )end;
! 5275: env->reg = reg;
! 5276:
! 5277: *root = NULL;
! 5278: p = (UChar* )pattern;
! 5279: r = parse_regexp(root, &p, (UChar* )end, env);
! 5280: reg->num_mem = env->num_mem;
! 5281: return r;
! 5282: }
! 5283:
! 5284: extern void
! 5285: onig_scan_env_set_error_string(ScanEnv* env, int ecode,
! 5286: UChar* arg, UChar* arg_end)
! 5287: {
! 5288: env->error = arg;
! 5289: env->error_end = arg_end;
! 5290: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>