Annotation of embedaddon/php/ext/mbstring/oniguruma/regcomp.c, revision 1.1
1.1 ! misho 1: /**********************************************************************
! 2: regcomp.c - Oniguruma (regular expression library)
! 3: **********************************************************************/
! 4: /*-
! 5: * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include "regparse.h"
! 31:
! 32: OnigAmbigType OnigDefaultAmbigFlag =
! 33: (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
! 34: ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
! 35:
! 36: extern OnigAmbigType
! 37: onig_get_default_ambig_flag(void)
! 38: {
! 39: return OnigDefaultAmbigFlag;
! 40: }
! 41:
! 42: extern int
! 43: onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
! 44: {
! 45: OnigDefaultAmbigFlag = ambig_flag;
! 46: return 0;
! 47: }
! 48:
! 49:
! 50: static UChar*
! 51: k_strdup(UChar* s, UChar* end)
! 52: {
! 53: int len = end - s;
! 54:
! 55: if (len > 0) {
! 56: UChar* r = (UChar* )xmalloc(len + 1);
! 57: CHECK_NULL_RETURN(r);
! 58: xmemcpy(r, s, len);
! 59: r[len] = (UChar )0;
! 60: return r;
! 61: }
! 62: else return NULL;
! 63: }
! 64:
! 65: /*
! 66: Caution: node should not be a string node.
! 67: (s and end member address break)
! 68: */
! 69: static void
! 70: swap_node(Node* a, Node* b)
! 71: {
! 72: Node c;
! 73: c = *a; *a = *b; *b = c;
! 74: }
! 75:
! 76: static OnigDistance
! 77: distance_add(OnigDistance d1, OnigDistance d2)
! 78: {
! 79: if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
! 80: return ONIG_INFINITE_DISTANCE;
! 81: else {
! 82: if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
! 83: else return ONIG_INFINITE_DISTANCE;
! 84: }
! 85: }
! 86:
! 87: static OnigDistance
! 88: distance_multiply(OnigDistance d, int m)
! 89: {
! 90: if (m == 0) return 0;
! 91:
! 92: if (d < ONIG_INFINITE_DISTANCE / m)
! 93: return d * m;
! 94: else
! 95: return ONIG_INFINITE_DISTANCE;
! 96: }
! 97:
! 98: static int
! 99: bitset_is_empty(BitSetRef bs)
! 100: {
! 101: int i;
! 102: for (i = 0; i < BITSET_SIZE; i++) {
! 103: if (bs[i] != 0) return 0;
! 104: }
! 105: return 1;
! 106: }
! 107:
! 108: #ifdef ONIG_DEBUG
! 109: static int
! 110: bitset_on_num(BitSetRef bs)
! 111: {
! 112: int i, n;
! 113:
! 114: n = 0;
! 115: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 116: if (BITSET_AT(bs, i)) n++;
! 117: }
! 118: return n;
! 119: }
! 120: #endif
! 121:
! 122: extern int
! 123: onig_bbuf_init(BBuf* buf, int size)
! 124: {
! 125: buf->p = (UChar* )xmalloc(size);
! 126: if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
! 127:
! 128: buf->alloc = size;
! 129: buf->used = 0;
! 130: return 0;
! 131: }
! 132:
! 133:
! 134: #ifdef USE_SUBEXP_CALL
! 135:
! 136: static int
! 137: unset_addr_list_init(UnsetAddrList* uslist, int size)
! 138: {
! 139: UnsetAddr* p;
! 140:
! 141: p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
! 142: CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
! 143: uslist->num = 0;
! 144: uslist->alloc = size;
! 145: uslist->us = p;
! 146: return 0;
! 147: }
! 148:
! 149: static void
! 150: unset_addr_list_end(UnsetAddrList* uslist)
! 151: {
! 152: if (IS_NOT_NULL(uslist->us))
! 153: xfree(uslist->us);
! 154: }
! 155:
! 156: static int
! 157: unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
! 158: {
! 159: UnsetAddr* p;
! 160: int size;
! 161:
! 162: if (uslist->num >= uslist->alloc) {
! 163: size = uslist->alloc * 2;
! 164: p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
! 165: CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
! 166: uslist->alloc = size;
! 167: uslist->us = p;
! 168: }
! 169:
! 170: uslist->us[uslist->num].offset = offset;
! 171: uslist->us[uslist->num].target = node;
! 172: uslist->num++;
! 173: return 0;
! 174: }
! 175: #endif /* USE_SUBEXP_CALL */
! 176:
! 177:
! 178: static int
! 179: add_opcode(regex_t* reg, int opcode)
! 180: {
! 181: BBUF_ADD1(reg, opcode);
! 182: return 0;
! 183: }
! 184:
! 185: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 186: static int
! 187: add_state_check_num(regex_t* reg, int num)
! 188: {
! 189: StateCheckNumType n = (StateCheckNumType )num;
! 190:
! 191: BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
! 192: return 0;
! 193: }
! 194: #endif
! 195:
! 196: static int
! 197: add_rel_addr(regex_t* reg, int addr)
! 198: {
! 199: RelAddrType ra = (RelAddrType )addr;
! 200:
! 201: BBUF_ADD(reg, &ra, SIZE_RELADDR);
! 202: return 0;
! 203: }
! 204:
! 205: static int
! 206: add_abs_addr(regex_t* reg, int addr)
! 207: {
! 208: AbsAddrType ra = (AbsAddrType )addr;
! 209:
! 210: BBUF_ADD(reg, &ra, SIZE_ABSADDR);
! 211: return 0;
! 212: }
! 213:
! 214: static int
! 215: add_length(regex_t* reg, int len)
! 216: {
! 217: LengthType l = (LengthType )len;
! 218:
! 219: BBUF_ADD(reg, &l, SIZE_LENGTH);
! 220: return 0;
! 221: }
! 222:
! 223: static int
! 224: add_mem_num(regex_t* reg, int num)
! 225: {
! 226: MemNumType n = (MemNumType )num;
! 227:
! 228: BBUF_ADD(reg, &n, SIZE_MEMNUM);
! 229: return 0;
! 230: }
! 231:
! 232: static int
! 233: add_pointer(regex_t* reg, void* addr)
! 234: {
! 235: PointerType ptr = (PointerType )addr;
! 236:
! 237: BBUF_ADD(reg, &ptr, SIZE_POINTER);
! 238: return 0;
! 239: }
! 240:
! 241: static int
! 242: add_option(regex_t* reg, OnigOptionType option)
! 243: {
! 244: BBUF_ADD(reg, &option, SIZE_OPTION);
! 245: return 0;
! 246: }
! 247:
! 248: static int
! 249: add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
! 250: {
! 251: int r;
! 252:
! 253: r = add_opcode(reg, opcode);
! 254: if (r) return r;
! 255: r = add_rel_addr(reg, addr);
! 256: return r;
! 257: }
! 258:
! 259: static int
! 260: add_bytes(regex_t* reg, UChar* bytes, int len)
! 261: {
! 262: BBUF_ADD(reg, bytes, len);
! 263: return 0;
! 264: }
! 265:
! 266: static int
! 267: add_bitset(regex_t* reg, BitSetRef bs)
! 268: {
! 269: BBUF_ADD(reg, bs, SIZE_BITSET);
! 270: return 0;
! 271: }
! 272:
! 273: static int
! 274: add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
! 275: {
! 276: int r;
! 277:
! 278: r = add_opcode(reg, opcode);
! 279: if (r) return r;
! 280: r = add_option(reg, option);
! 281: return r;
! 282: }
! 283:
! 284: static int compile_length_tree(Node* node, regex_t* reg);
! 285: static int compile_tree(Node* node, regex_t* reg);
! 286:
! 287:
! 288: #define IS_NEED_STR_LEN_OP_EXACT(op) \
! 289: ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
! 290: (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
! 291:
! 292: static int
! 293: select_str_opcode(int mb_len, int str_len, int ignore_case)
! 294: {
! 295: int op;
! 296:
! 297: if (ignore_case) {
! 298: switch (str_len) {
! 299: case 1: op = OP_EXACT1_IC; break;
! 300: default: op = OP_EXACTN_IC; break;
! 301: }
! 302: }
! 303: else {
! 304: switch (mb_len) {
! 305: case 1:
! 306: switch (str_len) {
! 307: case 1: op = OP_EXACT1; break;
! 308: case 2: op = OP_EXACT2; break;
! 309: case 3: op = OP_EXACT3; break;
! 310: case 4: op = OP_EXACT4; break;
! 311: case 5: op = OP_EXACT5; break;
! 312: default: op = OP_EXACTN; break;
! 313: }
! 314: break;
! 315:
! 316: case 2:
! 317: switch (str_len) {
! 318: case 1: op = OP_EXACTMB2N1; break;
! 319: case 2: op = OP_EXACTMB2N2; break;
! 320: case 3: op = OP_EXACTMB2N3; break;
! 321: default: op = OP_EXACTMB2N; break;
! 322: }
! 323: break;
! 324:
! 325: case 3:
! 326: op = OP_EXACTMB3N;
! 327: break;
! 328:
! 329: default:
! 330: op = OP_EXACTMBN;
! 331: break;
! 332: }
! 333: }
! 334: return op;
! 335: }
! 336:
! 337: static int
! 338: compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
! 339: {
! 340: int r;
! 341: int saved_num_null_check = reg->num_null_check;
! 342:
! 343: if (empty_info != 0) {
! 344: r = add_opcode(reg, OP_NULL_CHECK_START);
! 345: if (r) return r;
! 346: r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
! 347: if (r) return r;
! 348: reg->num_null_check++;
! 349: }
! 350:
! 351: r = compile_tree(node, reg);
! 352: if (r) return r;
! 353:
! 354: if (empty_info != 0) {
! 355: if (empty_info == NQ_TARGET_IS_EMPTY)
! 356: r = add_opcode(reg, OP_NULL_CHECK_END);
! 357: else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
! 358: r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
! 359: else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
! 360: r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
! 361:
! 362: if (r) return r;
! 363: r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
! 364: }
! 365: return r;
! 366: }
! 367:
! 368: #ifdef USE_SUBEXP_CALL
! 369: static int
! 370: compile_call(CallNode* node, regex_t* reg)
! 371: {
! 372: int r;
! 373:
! 374: r = add_opcode(reg, OP_CALL);
! 375: if (r) return r;
! 376: r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
! 377: node->target);
! 378: if (r) return r;
! 379: r = add_abs_addr(reg, 0 /*dummy addr.*/);
! 380: return r;
! 381: }
! 382: #endif
! 383:
! 384: static int
! 385: compile_tree_n_times(Node* node, int n, regex_t* reg)
! 386: {
! 387: int i, r;
! 388:
! 389: for (i = 0; i < n; i++) {
! 390: r = compile_tree(node, reg);
! 391: if (r) return r;
! 392: }
! 393: return 0;
! 394: }
! 395:
! 396: static int
! 397: add_compile_string_length(UChar* s, int mb_len, int str_len,
! 398: regex_t* reg, int ignore_case)
! 399: {
! 400: int len;
! 401: int op = select_str_opcode(mb_len, str_len, ignore_case);
! 402:
! 403: len = SIZE_OPCODE;
! 404:
! 405: if (op == OP_EXACTMBN) len += SIZE_LENGTH;
! 406: if (IS_NEED_STR_LEN_OP_EXACT(op))
! 407: len += SIZE_LENGTH;
! 408:
! 409: len += mb_len * str_len;
! 410: return len;
! 411: }
! 412:
! 413: static int
! 414: add_compile_string(UChar* s, int mb_len, int str_len,
! 415: regex_t* reg, int ignore_case)
! 416: {
! 417: int op = select_str_opcode(mb_len, str_len, ignore_case);
! 418: add_opcode(reg, op);
! 419:
! 420: if (op == OP_EXACTMBN)
! 421: add_length(reg, mb_len);
! 422:
! 423: if (IS_NEED_STR_LEN_OP_EXACT(op)) {
! 424: if (op == OP_EXACTN_IC)
! 425: add_length(reg, mb_len * str_len);
! 426: else
! 427: add_length(reg, str_len);
! 428: }
! 429:
! 430: add_bytes(reg, s, mb_len * str_len);
! 431: return 0;
! 432: }
! 433:
! 434:
! 435: static int
! 436: compile_length_string_node(Node* node, regex_t* reg)
! 437: {
! 438: int rlen, r, len, prev_len, slen, ambig;
! 439: OnigEncoding enc = reg->enc;
! 440: UChar *p, *prev;
! 441: StrNode* sn;
! 442:
! 443: sn = &(NSTRING(node));
! 444: if (sn->end <= sn->s)
! 445: return 0;
! 446:
! 447: ambig = NSTRING_IS_AMBIG(node);
! 448:
! 449: p = prev = sn->s;
! 450: prev_len = enc_len(enc, p);
! 451: p += prev_len;
! 452: slen = 1;
! 453: rlen = 0;
! 454:
! 455: for (; p < sn->end; ) {
! 456: len = enc_len(enc, p);
! 457: if (len == prev_len) {
! 458: slen++;
! 459: }
! 460: else {
! 461: r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
! 462: rlen += r;
! 463: prev = p;
! 464: slen = 1;
! 465: prev_len = len;
! 466: }
! 467: p += len;
! 468: }
! 469: r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
! 470: rlen += r;
! 471: return rlen;
! 472: }
! 473:
! 474: static int
! 475: compile_length_string_raw_node(StrNode* sn, regex_t* reg)
! 476: {
! 477: if (sn->end <= sn->s)
! 478: return 0;
! 479:
! 480: return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
! 481: }
! 482:
! 483: static int
! 484: compile_string_node(Node* node, regex_t* reg)
! 485: {
! 486: int r, len, prev_len, slen, ambig;
! 487: OnigEncoding enc = reg->enc;
! 488: UChar *p, *prev, *end;
! 489: StrNode* sn;
! 490:
! 491: sn = &(NSTRING(node));
! 492: if (sn->end <= sn->s)
! 493: return 0;
! 494:
! 495: end = sn->end;
! 496: ambig = NSTRING_IS_AMBIG(node);
! 497:
! 498: p = prev = sn->s;
! 499: prev_len = enc_len(enc, p);
! 500: p += prev_len;
! 501: slen = 1;
! 502:
! 503: for (; p < end; ) {
! 504: len = enc_len(enc, p);
! 505: if (len == prev_len) {
! 506: slen++;
! 507: }
! 508: else {
! 509: r = add_compile_string(prev, prev_len, slen, reg, ambig);
! 510: if (r) return r;
! 511:
! 512: prev = p;
! 513: slen = 1;
! 514: prev_len = len;
! 515: }
! 516:
! 517: p += len;
! 518: }
! 519: return add_compile_string(prev, prev_len, slen, reg, ambig);
! 520: }
! 521:
! 522: static int
! 523: compile_string_raw_node(StrNode* sn, regex_t* reg)
! 524: {
! 525: if (sn->end <= sn->s)
! 526: return 0;
! 527:
! 528: return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
! 529: }
! 530:
! 531: static int
! 532: add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
! 533: {
! 534: #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
! 535: add_length(reg, mbuf->used);
! 536: return add_bytes(reg, mbuf->p, mbuf->used);
! 537: #else
! 538: static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
! 539:
! 540: int r, pad_size;
! 541: UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
! 542:
! 543: GET_ALIGNMENT_PAD_SIZE(p, pad_size);
! 544: add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
! 545: if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
! 546:
! 547: r = add_bytes(reg, mbuf->p, mbuf->used);
! 548:
! 549: /* padding for return value from compile_length_cclass_node() to be fix. */
! 550: pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
! 551: if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
! 552: return r;
! 553: #endif
! 554: }
! 555:
! 556: static int
! 557: compile_length_cclass_node(CClassNode* cc, regex_t* reg)
! 558: {
! 559: int len;
! 560:
! 561: if (IS_CCLASS_SHARE(cc)) {
! 562: len = SIZE_OPCODE + SIZE_POINTER;
! 563: return len;
! 564: }
! 565:
! 566: if (IS_NULL(cc->mbuf)) {
! 567: len = SIZE_OPCODE + SIZE_BITSET;
! 568: }
! 569: else {
! 570: if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
! 571: len = SIZE_OPCODE;
! 572: }
! 573: else {
! 574: len = SIZE_OPCODE + SIZE_BITSET;
! 575: }
! 576: #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
! 577: len += SIZE_LENGTH + cc->mbuf->used;
! 578: #else
! 579: len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
! 580: #endif
! 581: }
! 582:
! 583: return len;
! 584: }
! 585:
! 586: static int
! 587: compile_cclass_node(CClassNode* cc, regex_t* reg)
! 588: {
! 589: int r;
! 590:
! 591: if (IS_CCLASS_SHARE(cc)) {
! 592: add_opcode(reg, OP_CCLASS_NODE);
! 593: r = add_pointer(reg, cc);
! 594: return r;
! 595: }
! 596:
! 597: if (IS_NULL(cc->mbuf)) {
! 598: if (IS_CCLASS_NOT(cc))
! 599: add_opcode(reg, OP_CCLASS_NOT);
! 600: else
! 601: add_opcode(reg, OP_CCLASS);
! 602:
! 603: r = add_bitset(reg, cc->bs);
! 604: }
! 605: else {
! 606: if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
! 607: if (IS_CCLASS_NOT(cc))
! 608: add_opcode(reg, OP_CCLASS_MB_NOT);
! 609: else
! 610: add_opcode(reg, OP_CCLASS_MB);
! 611:
! 612: r = add_multi_byte_cclass(cc->mbuf, reg);
! 613: }
! 614: else {
! 615: if (IS_CCLASS_NOT(cc))
! 616: add_opcode(reg, OP_CCLASS_MIX_NOT);
! 617: else
! 618: add_opcode(reg, OP_CCLASS_MIX);
! 619:
! 620: r = add_bitset(reg, cc->bs);
! 621: if (r) return r;
! 622: r = add_multi_byte_cclass(cc->mbuf, reg);
! 623: }
! 624: }
! 625:
! 626: return r;
! 627: }
! 628:
! 629: static int
! 630: entry_repeat_range(regex_t* reg, int id, int lower, int upper)
! 631: {
! 632: #define REPEAT_RANGE_ALLOC 4
! 633:
! 634: OnigRepeatRange* p;
! 635:
! 636: if (reg->repeat_range_alloc == 0) {
! 637: p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
! 638: CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
! 639: reg->repeat_range = p;
! 640: reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
! 641: }
! 642: else if (reg->repeat_range_alloc <= id) {
! 643: int n;
! 644: n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
! 645: p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
! 646: sizeof(OnigRepeatRange) * n);
! 647: CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
! 648: reg->repeat_range = p;
! 649: reg->repeat_range_alloc = n;
! 650: }
! 651: else {
! 652: p = reg->repeat_range;
! 653: }
! 654:
! 655: p[id].lower = lower;
! 656: p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
! 657: return 0;
! 658: }
! 659:
! 660: static int
! 661: compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
! 662: regex_t* reg)
! 663: {
! 664: int r;
! 665: int num_repeat = reg->num_repeat;
! 666:
! 667: r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
! 668: if (r) return r;
! 669: r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
! 670: reg->num_repeat++;
! 671: if (r) return r;
! 672: r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
! 673: if (r) return r;
! 674:
! 675: r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
! 676: if (r) return r;
! 677:
! 678: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 679: if (r) return r;
! 680:
! 681: if (
! 682: #ifdef USE_SUBEXP_CALL
! 683: reg->num_call > 0 ||
! 684: #endif
! 685: IS_QUANTIFIER_IN_REPEAT(qn)) {
! 686: r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
! 687: }
! 688: else {
! 689: r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
! 690: }
! 691: if (r) return r;
! 692: r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
! 693: return r;
! 694: }
! 695:
! 696: static int
! 697: is_anychar_star_quantifier(QuantifierNode* qn)
! 698: {
! 699: if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
! 700: NTYPE(qn->target) == N_ANYCHAR)
! 701: return 1;
! 702: else
! 703: return 0;
! 704: }
! 705:
! 706: #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
! 707: #define CKN_ON (ckn > 0)
! 708:
! 709: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 710:
! 711: static int
! 712: compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
! 713: {
! 714: int len, mod_tlen, cklen;
! 715: int ckn;
! 716: int infinite = IS_REPEAT_INFINITE(qn->upper);
! 717: int empty_info = qn->target_empty_info;
! 718: int tlen = compile_length_tree(qn->target, reg);
! 719:
! 720: if (tlen < 0) return tlen;
! 721:
! 722: ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
! 723:
! 724: cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
! 725:
! 726: /* anychar repeat */
! 727: if (NTYPE(qn->target) == N_ANYCHAR) {
! 728: if (qn->greedy && infinite) {
! 729: if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
! 730: return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
! 731: else
! 732: return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
! 733: }
! 734: }
! 735:
! 736: if (empty_info != 0)
! 737: mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
! 738: else
! 739: mod_tlen = tlen;
! 740:
! 741: if (infinite && qn->lower <= 1) {
! 742: if (qn->greedy) {
! 743: if (qn->lower == 1)
! 744: len = SIZE_OP_JUMP;
! 745: else
! 746: len = 0;
! 747:
! 748: len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
! 749: }
! 750: else {
! 751: if (qn->lower == 0)
! 752: len = SIZE_OP_JUMP;
! 753: else
! 754: len = 0;
! 755:
! 756: len += mod_tlen + SIZE_OP_PUSH + cklen;
! 757: }
! 758: }
! 759: else if (qn->upper == 0) {
! 760: if (qn->is_refered != 0) /* /(?<n>..){0}/ */
! 761: len = SIZE_OP_JUMP + tlen;
! 762: else
! 763: len = 0;
! 764: }
! 765: else if (qn->upper == 1 && qn->greedy) {
! 766: if (qn->lower == 0) {
! 767: if (CKN_ON) {
! 768: len = SIZE_OP_STATE_CHECK_PUSH + tlen;
! 769: }
! 770: else {
! 771: len = SIZE_OP_PUSH + tlen;
! 772: }
! 773: }
! 774: else {
! 775: len = tlen;
! 776: }
! 777: }
! 778: else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
! 779: len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
! 780: }
! 781: else {
! 782: len = SIZE_OP_REPEAT_INC
! 783: + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
! 784: if (CKN_ON)
! 785: len += SIZE_OP_STATE_CHECK;
! 786: }
! 787:
! 788: return len;
! 789: }
! 790:
! 791: static int
! 792: compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
! 793: {
! 794: int r, mod_tlen;
! 795: int ckn;
! 796: int infinite = IS_REPEAT_INFINITE(qn->upper);
! 797: int empty_info = qn->target_empty_info;
! 798: int tlen = compile_length_tree(qn->target, reg);
! 799:
! 800: if (tlen < 0) return tlen;
! 801:
! 802: ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
! 803:
! 804: if (is_anychar_star_quantifier(qn)) {
! 805: r = compile_tree_n_times(qn->target, qn->lower, reg);
! 806: if (r) return r;
! 807: if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
! 808: if (IS_MULTILINE(reg->options))
! 809: r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
! 810: else
! 811: r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
! 812: if (r) return r;
! 813: if (CKN_ON) {
! 814: r = add_state_check_num(reg, ckn);
! 815: if (r) return r;
! 816: }
! 817:
! 818: return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
! 819: }
! 820: else {
! 821: if (IS_MULTILINE(reg->options)) {
! 822: r = add_opcode(reg, (CKN_ON ?
! 823: OP_STATE_CHECK_ANYCHAR_ML_STAR
! 824: : OP_ANYCHAR_ML_STAR));
! 825: }
! 826: else {
! 827: r = add_opcode(reg, (CKN_ON ?
! 828: OP_STATE_CHECK_ANYCHAR_STAR
! 829: : OP_ANYCHAR_STAR));
! 830: }
! 831: if (r) return r;
! 832: if (CKN_ON)
! 833: r = add_state_check_num(reg, ckn);
! 834:
! 835: return r;
! 836: }
! 837: }
! 838:
! 839: if (empty_info != 0)
! 840: mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
! 841: else
! 842: mod_tlen = tlen;
! 843:
! 844: if (infinite && qn->lower <= 1) {
! 845: if (qn->greedy) {
! 846: if (qn->lower == 1) {
! 847: r = add_opcode_rel_addr(reg, OP_JUMP,
! 848: (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
! 849: if (r) return r;
! 850: }
! 851:
! 852: if (CKN_ON) {
! 853: r = add_opcode(reg, OP_STATE_CHECK_PUSH);
! 854: if (r) return r;
! 855: r = add_state_check_num(reg, ckn);
! 856: if (r) return r;
! 857: r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
! 858: }
! 859: else {
! 860: r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
! 861: }
! 862: if (r) return r;
! 863: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 864: if (r) return r;
! 865: r = add_opcode_rel_addr(reg, OP_JUMP,
! 866: -(mod_tlen + (int )SIZE_OP_JUMP
! 867: + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
! 868: }
! 869: else {
! 870: if (qn->lower == 0) {
! 871: r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
! 872: if (r) return r;
! 873: }
! 874: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 875: if (r) return r;
! 876: if (CKN_ON) {
! 877: r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
! 878: if (r) return r;
! 879: r = add_state_check_num(reg, ckn);
! 880: if (r) return r;
! 881: r = add_rel_addr(reg,
! 882: -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
! 883: }
! 884: else
! 885: r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
! 886: }
! 887: }
! 888: else if (qn->upper == 0) {
! 889: if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
! 890: r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
! 891: if (r) return r;
! 892: r = compile_tree(qn->target, reg);
! 893: }
! 894: else
! 895: r = 0;
! 896: }
! 897: else if (qn->upper == 1 && qn->greedy) {
! 898: if (qn->lower == 0) {
! 899: if (CKN_ON) {
! 900: r = add_opcode(reg, OP_STATE_CHECK_PUSH);
! 901: if (r) return r;
! 902: r = add_state_check_num(reg, ckn);
! 903: if (r) return r;
! 904: r = add_rel_addr(reg, tlen);
! 905: }
! 906: else {
! 907: r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
! 908: }
! 909: if (r) return r;
! 910: }
! 911:
! 912: r = compile_tree(qn->target, reg);
! 913: }
! 914: else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
! 915: if (CKN_ON) {
! 916: r = add_opcode(reg, OP_STATE_CHECK_PUSH);
! 917: if (r) return r;
! 918: r = add_state_check_num(reg, ckn);
! 919: if (r) return r;
! 920: r = add_rel_addr(reg, SIZE_OP_JUMP);
! 921: }
! 922: else {
! 923: r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
! 924: }
! 925:
! 926: if (r) return r;
! 927: r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
! 928: if (r) return r;
! 929: r = compile_tree(qn->target, reg);
! 930: }
! 931: else {
! 932: r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
! 933: if (CKN_ON) {
! 934: if (r) return r;
! 935: r = add_opcode(reg, OP_STATE_CHECK);
! 936: if (r) return r;
! 937: r = add_state_check_num(reg, ckn);
! 938: }
! 939: }
! 940: return r;
! 941: }
! 942:
! 943: #else /* USE_COMBINATION_EXPLOSION_CHECK */
! 944:
! 945: static int
! 946: compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
! 947: {
! 948: int len, mod_tlen;
! 949: int infinite = IS_REPEAT_INFINITE(qn->upper);
! 950: int empty_info = qn->target_empty_info;
! 951: int tlen = compile_length_tree(qn->target, reg);
! 952:
! 953: if (tlen < 0) return tlen;
! 954:
! 955: /* anychar repeat */
! 956: if (NTYPE(qn->target) == N_ANYCHAR) {
! 957: if (qn->greedy && infinite) {
! 958: if (IS_NOT_NULL(qn->next_head_exact))
! 959: return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
! 960: else
! 961: return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
! 962: }
! 963: }
! 964:
! 965: if (empty_info != 0)
! 966: mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
! 967: else
! 968: mod_tlen = tlen;
! 969:
! 970: if (infinite &&
! 971: (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
! 972: if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
! 973: len = SIZE_OP_JUMP;
! 974: }
! 975: else {
! 976: len = tlen * qn->lower;
! 977: }
! 978:
! 979: if (qn->greedy) {
! 980: if (IS_NOT_NULL(qn->head_exact))
! 981: len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
! 982: else if (IS_NOT_NULL(qn->next_head_exact))
! 983: len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
! 984: else
! 985: len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
! 986: }
! 987: else
! 988: len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
! 989: }
! 990: else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
! 991: len = SIZE_OP_JUMP + tlen;
! 992: }
! 993: else if (!infinite && qn->greedy &&
! 994: (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
! 995: <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
! 996: len = tlen * qn->lower;
! 997: len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
! 998: }
! 999: else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
! 1000: len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
! 1001: }
! 1002: else {
! 1003: len = SIZE_OP_REPEAT_INC
! 1004: + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
! 1005: }
! 1006:
! 1007: return len;
! 1008: }
! 1009:
! 1010: static int
! 1011: compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
! 1012: {
! 1013: int i, r, mod_tlen;
! 1014: int infinite = IS_REPEAT_INFINITE(qn->upper);
! 1015: int empty_info = qn->target_empty_info;
! 1016: int tlen = compile_length_tree(qn->target, reg);
! 1017:
! 1018: if (tlen < 0) return tlen;
! 1019:
! 1020: if (is_anychar_star_quantifier(qn)) {
! 1021: r = compile_tree_n_times(qn->target, qn->lower, reg);
! 1022: if (r) return r;
! 1023: if (IS_NOT_NULL(qn->next_head_exact)) {
! 1024: if (IS_MULTILINE(reg->options))
! 1025: r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
! 1026: else
! 1027: r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
! 1028: if (r) return r;
! 1029: return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
! 1030: }
! 1031: else {
! 1032: if (IS_MULTILINE(reg->options))
! 1033: return add_opcode(reg, OP_ANYCHAR_ML_STAR);
! 1034: else
! 1035: return add_opcode(reg, OP_ANYCHAR_STAR);
! 1036: }
! 1037: }
! 1038:
! 1039: if (empty_info != 0)
! 1040: mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
! 1041: else
! 1042: mod_tlen = tlen;
! 1043:
! 1044: if (infinite &&
! 1045: (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
! 1046: if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
! 1047: if (qn->greedy) {
! 1048: if (IS_NOT_NULL(qn->head_exact))
! 1049: r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
! 1050: else if (IS_NOT_NULL(qn->next_head_exact))
! 1051: r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
! 1052: else
! 1053: r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
! 1054: }
! 1055: else {
! 1056: r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
! 1057: }
! 1058: if (r) return r;
! 1059: }
! 1060: else {
! 1061: r = compile_tree_n_times(qn->target, qn->lower, reg);
! 1062: if (r) return r;
! 1063: }
! 1064:
! 1065: if (qn->greedy) {
! 1066: if (IS_NOT_NULL(qn->head_exact)) {
! 1067: r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
! 1068: mod_tlen + SIZE_OP_JUMP);
! 1069: if (r) return r;
! 1070: add_bytes(reg, NSTRING(qn->head_exact).s, 1);
! 1071: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 1072: if (r) return r;
! 1073: r = add_opcode_rel_addr(reg, OP_JUMP,
! 1074: -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
! 1075: }
! 1076: else if (IS_NOT_NULL(qn->next_head_exact)) {
! 1077: r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
! 1078: mod_tlen + SIZE_OP_JUMP);
! 1079: if (r) return r;
! 1080: add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
! 1081: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 1082: if (r) return r;
! 1083: r = add_opcode_rel_addr(reg, OP_JUMP,
! 1084: -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
! 1085: }
! 1086: else {
! 1087: r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
! 1088: if (r) return r;
! 1089: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 1090: if (r) return r;
! 1091: r = add_opcode_rel_addr(reg, OP_JUMP,
! 1092: -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
! 1093: }
! 1094: }
! 1095: else {
! 1096: r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
! 1097: if (r) return r;
! 1098: r = compile_tree_empty_check(qn->target, reg, empty_info);
! 1099: if (r) return r;
! 1100: r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
! 1101: }
! 1102: }
! 1103: else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
! 1104: r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
! 1105: if (r) return r;
! 1106: r = compile_tree(qn->target, reg);
! 1107: }
! 1108: else if (!infinite && qn->greedy &&
! 1109: (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
! 1110: <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
! 1111: int n = qn->upper - qn->lower;
! 1112:
! 1113: r = compile_tree_n_times(qn->target, qn->lower, reg);
! 1114: if (r) return r;
! 1115:
! 1116: for (i = 0; i < n; i++) {
! 1117: r = add_opcode_rel_addr(reg, OP_PUSH,
! 1118: (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
! 1119: if (r) return r;
! 1120: r = compile_tree(qn->target, reg);
! 1121: if (r) return r;
! 1122: }
! 1123: }
! 1124: else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
! 1125: r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
! 1126: if (r) return r;
! 1127: r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
! 1128: if (r) return r;
! 1129: r = compile_tree(qn->target, reg);
! 1130: }
! 1131: else {
! 1132: r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
! 1133: }
! 1134: return r;
! 1135: }
! 1136: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
! 1137:
! 1138: static int
! 1139: compile_length_option_node(EffectNode* node, regex_t* reg)
! 1140: {
! 1141: int tlen;
! 1142: OnigOptionType prev = reg->options;
! 1143:
! 1144: reg->options = node->option;
! 1145: tlen = compile_length_tree(node->target, reg);
! 1146: reg->options = prev;
! 1147:
! 1148: if (tlen < 0) return tlen;
! 1149:
! 1150: if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
! 1151: return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
! 1152: + tlen + SIZE_OP_SET_OPTION;
! 1153: }
! 1154: else
! 1155: return tlen;
! 1156: }
! 1157:
! 1158: static int
! 1159: compile_option_node(EffectNode* node, regex_t* reg)
! 1160: {
! 1161: int r;
! 1162: OnigOptionType prev = reg->options;
! 1163:
! 1164: if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
! 1165: r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
! 1166: if (r) return r;
! 1167: r = add_opcode_option(reg, OP_SET_OPTION, prev);
! 1168: if (r) return r;
! 1169: r = add_opcode(reg, OP_FAIL);
! 1170: if (r) return r;
! 1171: }
! 1172:
! 1173: reg->options = node->option;
! 1174: r = compile_tree(node->target, reg);
! 1175: reg->options = prev;
! 1176:
! 1177: if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
! 1178: if (r) return r;
! 1179: r = add_opcode_option(reg, OP_SET_OPTION, prev);
! 1180: }
! 1181: return r;
! 1182: }
! 1183:
! 1184: static int
! 1185: compile_length_effect_node(EffectNode* node, regex_t* reg)
! 1186: {
! 1187: int len;
! 1188: int tlen;
! 1189:
! 1190: if (node->type == EFFECT_OPTION)
! 1191: return compile_length_option_node(node, reg);
! 1192:
! 1193: if (node->target) {
! 1194: tlen = compile_length_tree(node->target, reg);
! 1195: if (tlen < 0) return tlen;
! 1196: }
! 1197: else
! 1198: tlen = 0;
! 1199:
! 1200: switch (node->type) {
! 1201: case EFFECT_MEMORY:
! 1202: #ifdef USE_SUBEXP_CALL
! 1203: if (IS_EFFECT_CALLED(node)) {
! 1204: len = SIZE_OP_MEMORY_START_PUSH + tlen
! 1205: + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
! 1206: if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
! 1207: len += (IS_EFFECT_RECURSION(node)
! 1208: ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
! 1209: else
! 1210: len += (IS_EFFECT_RECURSION(node)
! 1211: ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
! 1212: }
! 1213: else
! 1214: #endif
! 1215: {
! 1216: if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
! 1217: len = SIZE_OP_MEMORY_START_PUSH;
! 1218: else
! 1219: len = SIZE_OP_MEMORY_START;
! 1220:
! 1221: len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
! 1222: ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
! 1223: }
! 1224: break;
! 1225:
! 1226: case EFFECT_STOP_BACKTRACK:
! 1227: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
! 1228: QuantifierNode* qn = &NQUANTIFIER(node->target);
! 1229: tlen = compile_length_tree(qn->target, reg);
! 1230: if (tlen < 0) return tlen;
! 1231:
! 1232: len = tlen * qn->lower
! 1233: + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
! 1234: }
! 1235: else {
! 1236: len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
! 1237: }
! 1238: break;
! 1239:
! 1240: default:
! 1241: return ONIGERR_TYPE_BUG;
! 1242: break;
! 1243: }
! 1244:
! 1245: return len;
! 1246: }
! 1247:
! 1248: static int get_char_length_tree(Node* node, regex_t* reg, int* len);
! 1249:
! 1250: static int
! 1251: compile_effect_node(EffectNode* node, regex_t* reg)
! 1252: {
! 1253: int r, len;
! 1254:
! 1255: if (node->type == EFFECT_OPTION)
! 1256: return compile_option_node(node, reg);
! 1257:
! 1258: switch (node->type) {
! 1259: case EFFECT_MEMORY:
! 1260: #ifdef USE_SUBEXP_CALL
! 1261: if (IS_EFFECT_CALLED(node)) {
! 1262: r = add_opcode(reg, OP_CALL);
! 1263: if (r) return r;
! 1264: node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
! 1265: node->state |= NST_ADDR_FIXED;
! 1266: r = add_abs_addr(reg, (int )node->call_addr);
! 1267: if (r) return r;
! 1268: len = compile_length_tree(node->target, reg);
! 1269: len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
! 1270: if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
! 1271: len += (IS_EFFECT_RECURSION(node)
! 1272: ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
! 1273: else
! 1274: len += (IS_EFFECT_RECURSION(node)
! 1275: ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
! 1276:
! 1277: r = add_opcode_rel_addr(reg, OP_JUMP, len);
! 1278: if (r) return r;
! 1279: }
! 1280: #endif
! 1281: if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
! 1282: r = add_opcode(reg, OP_MEMORY_START_PUSH);
! 1283: else
! 1284: r = add_opcode(reg, OP_MEMORY_START);
! 1285: if (r) return r;
! 1286: r = add_mem_num(reg, node->regnum);
! 1287: if (r) return r;
! 1288: r = compile_tree(node->target, reg);
! 1289: if (r) return r;
! 1290: #ifdef USE_SUBEXP_CALL
! 1291: if (IS_EFFECT_CALLED(node)) {
! 1292: if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
! 1293: r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
! 1294: ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
! 1295: else
! 1296: r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
! 1297: ? OP_MEMORY_END_REC : OP_MEMORY_END));
! 1298:
! 1299: if (r) return r;
! 1300: r = add_mem_num(reg, node->regnum);
! 1301: if (r) return r;
! 1302: r = add_opcode(reg, OP_RETURN);
! 1303: }
! 1304: else
! 1305: #endif
! 1306: {
! 1307: if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
! 1308: r = add_opcode(reg, OP_MEMORY_END_PUSH);
! 1309: else
! 1310: r = add_opcode(reg, OP_MEMORY_END);
! 1311: if (r) return r;
! 1312: r = add_mem_num(reg, node->regnum);
! 1313: }
! 1314: break;
! 1315:
! 1316: case EFFECT_STOP_BACKTRACK:
! 1317: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
! 1318: QuantifierNode* qn = &NQUANTIFIER(node->target);
! 1319: r = compile_tree_n_times(qn->target, qn->lower, reg);
! 1320: if (r) return r;
! 1321:
! 1322: len = compile_length_tree(qn->target, reg);
! 1323: if (len < 0) return len;
! 1324:
! 1325: r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
! 1326: if (r) return r;
! 1327: r = compile_tree(qn->target, reg);
! 1328: if (r) return r;
! 1329: r = add_opcode(reg, OP_POP);
! 1330: if (r) return r;
! 1331: r = add_opcode_rel_addr(reg, OP_JUMP,
! 1332: -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
! 1333: }
! 1334: else {
! 1335: r = add_opcode(reg, OP_PUSH_STOP_BT);
! 1336: if (r) return r;
! 1337: r = compile_tree(node->target, reg);
! 1338: if (r) return r;
! 1339: r = add_opcode(reg, OP_POP_STOP_BT);
! 1340: }
! 1341: break;
! 1342:
! 1343: default:
! 1344: return ONIGERR_TYPE_BUG;
! 1345: break;
! 1346: }
! 1347:
! 1348: return r;
! 1349: }
! 1350:
! 1351: static int
! 1352: compile_length_anchor_node(AnchorNode* node, regex_t* reg)
! 1353: {
! 1354: int len;
! 1355: int tlen = 0;
! 1356:
! 1357: if (node->target) {
! 1358: tlen = compile_length_tree(node->target, reg);
! 1359: if (tlen < 0) return tlen;
! 1360: }
! 1361:
! 1362: switch (node->type) {
! 1363: case ANCHOR_PREC_READ:
! 1364: len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
! 1365: break;
! 1366: case ANCHOR_PREC_READ_NOT:
! 1367: len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
! 1368: break;
! 1369: case ANCHOR_LOOK_BEHIND:
! 1370: len = SIZE_OP_LOOK_BEHIND + tlen;
! 1371: break;
! 1372: case ANCHOR_LOOK_BEHIND_NOT:
! 1373: len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
! 1374: break;
! 1375:
! 1376: default:
! 1377: len = SIZE_OPCODE;
! 1378: break;
! 1379: }
! 1380:
! 1381: return len;
! 1382: }
! 1383:
! 1384: static int
! 1385: compile_anchor_node(AnchorNode* node, regex_t* reg)
! 1386: {
! 1387: int r, len;
! 1388:
! 1389: switch (node->type) {
! 1390: case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
! 1391: case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
! 1392: case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
! 1393: case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
! 1394: case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
! 1395: case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
! 1396:
! 1397: case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
! 1398: case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
! 1399: #ifdef USE_WORD_BEGIN_END
! 1400: case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
! 1401: case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
! 1402: #endif
! 1403:
! 1404: case ANCHOR_PREC_READ:
! 1405: r = add_opcode(reg, OP_PUSH_POS);
! 1406: if (r) return r;
! 1407: r = compile_tree(node->target, reg);
! 1408: if (r) return r;
! 1409: r = add_opcode(reg, OP_POP_POS);
! 1410: break;
! 1411:
! 1412: case ANCHOR_PREC_READ_NOT:
! 1413: len = compile_length_tree(node->target, reg);
! 1414: if (len < 0) return len;
! 1415: r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
! 1416: if (r) return r;
! 1417: r = compile_tree(node->target, reg);
! 1418: if (r) return r;
! 1419: r = add_opcode(reg, OP_FAIL_POS);
! 1420: break;
! 1421:
! 1422: case ANCHOR_LOOK_BEHIND:
! 1423: {
! 1424: int n;
! 1425: r = add_opcode(reg, OP_LOOK_BEHIND);
! 1426: if (r) return r;
! 1427: if (node->char_len < 0) {
! 1428: r = get_char_length_tree(node->target, reg, &n);
! 1429: if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
! 1430: }
! 1431: else
! 1432: n = node->char_len;
! 1433: r = add_length(reg, n);
! 1434: if (r) return r;
! 1435: r = compile_tree(node->target, reg);
! 1436: }
! 1437: break;
! 1438:
! 1439: case ANCHOR_LOOK_BEHIND_NOT:
! 1440: {
! 1441: int n;
! 1442: len = compile_length_tree(node->target, reg);
! 1443: r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
! 1444: len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
! 1445: if (r) return r;
! 1446: if (node->char_len < 0) {
! 1447: r = get_char_length_tree(node->target, reg, &n);
! 1448: if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
! 1449: }
! 1450: else
! 1451: n = node->char_len;
! 1452: r = add_length(reg, n);
! 1453: if (r) return r;
! 1454: r = compile_tree(node->target, reg);
! 1455: if (r) return r;
! 1456: r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
! 1457: }
! 1458: break;
! 1459:
! 1460: default:
! 1461: return ONIGERR_TYPE_BUG;
! 1462: break;
! 1463: }
! 1464:
! 1465: return r;
! 1466: }
! 1467:
! 1468: static int
! 1469: compile_length_tree(Node* node, regex_t* reg)
! 1470: {
! 1471: int len, type, r;
! 1472:
! 1473: type = NTYPE(node);
! 1474: switch (type) {
! 1475: case N_LIST:
! 1476: len = 0;
! 1477: do {
! 1478: r = compile_length_tree(NCONS(node).left, reg);
! 1479: if (r < 0) return r;
! 1480: len += r;
! 1481: } while (IS_NOT_NULL(node = NCONS(node).right));
! 1482: r = len;
! 1483: break;
! 1484:
! 1485: case N_ALT:
! 1486: {
! 1487: int n;
! 1488:
! 1489: n = r = 0;
! 1490: do {
! 1491: r += compile_length_tree(NCONS(node).left, reg);
! 1492: n++;
! 1493: } while (IS_NOT_NULL(node = NCONS(node).right));
! 1494: r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
! 1495: }
! 1496: break;
! 1497:
! 1498: case N_STRING:
! 1499: if (NSTRING_IS_RAW(node))
! 1500: r = compile_length_string_raw_node(&(NSTRING(node)), reg);
! 1501: else
! 1502: r = compile_length_string_node(node, reg);
! 1503: break;
! 1504:
! 1505: case N_CCLASS:
! 1506: r = compile_length_cclass_node(&(NCCLASS(node)), reg);
! 1507: break;
! 1508:
! 1509: case N_CTYPE:
! 1510: case N_ANYCHAR:
! 1511: r = SIZE_OPCODE;
! 1512: break;
! 1513:
! 1514: case N_BACKREF:
! 1515: {
! 1516: BackrefNode* br = &(NBACKREF(node));
! 1517:
! 1518: #ifdef USE_BACKREF_AT_LEVEL
! 1519: if (IS_BACKREF_NEST_LEVEL(br)) {
! 1520: r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
! 1521: SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
! 1522: }
! 1523: else
! 1524: #endif
! 1525: if (br->back_num == 1) {
! 1526: r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
! 1527: ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
! 1528: }
! 1529: else {
! 1530: r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
! 1531: }
! 1532: }
! 1533: break;
! 1534:
! 1535: #ifdef USE_SUBEXP_CALL
! 1536: case N_CALL:
! 1537: r = SIZE_OP_CALL;
! 1538: break;
! 1539: #endif
! 1540:
! 1541: case N_QUANTIFIER:
! 1542: r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
! 1543: break;
! 1544:
! 1545: case N_EFFECT:
! 1546: r = compile_length_effect_node(&NEFFECT(node), reg);
! 1547: break;
! 1548:
! 1549: case N_ANCHOR:
! 1550: r = compile_length_anchor_node(&(NANCHOR(node)), reg);
! 1551: break;
! 1552:
! 1553: default:
! 1554: return ONIGERR_TYPE_BUG;
! 1555: break;
! 1556: }
! 1557:
! 1558: return r;
! 1559: }
! 1560:
! 1561: static int
! 1562: compile_tree(Node* node, regex_t* reg)
! 1563: {
! 1564: int n, type, len, pos, r = 0;
! 1565:
! 1566: type = NTYPE(node);
! 1567: switch (type) {
! 1568: case N_LIST:
! 1569: do {
! 1570: r = compile_tree(NCONS(node).left, reg);
! 1571: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 1572: break;
! 1573:
! 1574: case N_ALT:
! 1575: {
! 1576: Node* x = node;
! 1577: len = 0;
! 1578: do {
! 1579: len += compile_length_tree(NCONS(x).left, reg);
! 1580: if (NCONS(x).right != NULL) {
! 1581: len += SIZE_OP_PUSH + SIZE_OP_JUMP;
! 1582: }
! 1583: } while (IS_NOT_NULL(x = NCONS(x).right));
! 1584: pos = reg->used + len; /* goal position */
! 1585:
! 1586: do {
! 1587: len = compile_length_tree(NCONS(node).left, reg);
! 1588: if (IS_NOT_NULL(NCONS(node).right)) {
! 1589: r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
! 1590: if (r) break;
! 1591: }
! 1592: r = compile_tree(NCONS(node).left, reg);
! 1593: if (r) break;
! 1594: if (IS_NOT_NULL(NCONS(node).right)) {
! 1595: len = pos - (reg->used + SIZE_OP_JUMP);
! 1596: r = add_opcode_rel_addr(reg, OP_JUMP, len);
! 1597: if (r) break;
! 1598: }
! 1599: } while (IS_NOT_NULL(node = NCONS(node).right));
! 1600: }
! 1601: break;
! 1602:
! 1603: case N_STRING:
! 1604: if (NSTRING_IS_RAW(node))
! 1605: r = compile_string_raw_node(&(NSTRING(node)), reg);
! 1606: else
! 1607: r = compile_string_node(node, reg);
! 1608: break;
! 1609:
! 1610: case N_CCLASS:
! 1611: r = compile_cclass_node(&(NCCLASS(node)), reg);
! 1612: break;
! 1613:
! 1614: case N_CTYPE:
! 1615: {
! 1616: int op;
! 1617:
! 1618: switch (NCTYPE(node).type) {
! 1619: case CTYPE_WORD: op = OP_WORD; break;
! 1620: case CTYPE_NOT_WORD: op = OP_NOT_WORD; break;
! 1621: default:
! 1622: return ONIGERR_TYPE_BUG;
! 1623: break;
! 1624: }
! 1625: r = add_opcode(reg, op);
! 1626: }
! 1627: break;
! 1628:
! 1629: case N_ANYCHAR:
! 1630: if (IS_MULTILINE(reg->options))
! 1631: r = add_opcode(reg, OP_ANYCHAR_ML);
! 1632: else
! 1633: r = add_opcode(reg, OP_ANYCHAR);
! 1634: break;
! 1635:
! 1636: case N_BACKREF:
! 1637: {
! 1638: BackrefNode* br = &(NBACKREF(node));
! 1639:
! 1640: #ifdef USE_BACKREF_AT_LEVEL
! 1641: if (IS_BACKREF_NEST_LEVEL(br)) {
! 1642: r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
! 1643: if (r) return r;
! 1644: r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
! 1645: if (r) return r;
! 1646: r = add_length(reg, br->nest_level);
! 1647: if (r) return r;
! 1648:
! 1649: goto add_bacref_mems;
! 1650: }
! 1651: else
! 1652: #endif
! 1653: if (br->back_num == 1) {
! 1654: n = br->back_static[0];
! 1655: if (IS_IGNORECASE(reg->options)) {
! 1656: r = add_opcode(reg, OP_BACKREFN_IC);
! 1657: if (r) return r;
! 1658: r = add_mem_num(reg, n);
! 1659: }
! 1660: else {
! 1661: switch (n) {
! 1662: case 1: r = add_opcode(reg, OP_BACKREF1); break;
! 1663: case 2: r = add_opcode(reg, OP_BACKREF2); break;
! 1664: default:
! 1665: r = add_opcode(reg, OP_BACKREFN);
! 1666: if (r) return r;
! 1667: r = add_mem_num(reg, n);
! 1668: break;
! 1669: }
! 1670: }
! 1671: }
! 1672: else {
! 1673: int i;
! 1674: int* p;
! 1675:
! 1676: if (IS_IGNORECASE(reg->options)) {
! 1677: r = add_opcode(reg, OP_BACKREF_MULTI_IC);
! 1678: }
! 1679: else {
! 1680: r = add_opcode(reg, OP_BACKREF_MULTI);
! 1681: }
! 1682: if (r) return r;
! 1683:
! 1684: #ifdef USE_BACKREF_AT_LEVEL
! 1685: add_bacref_mems:
! 1686: #endif
! 1687: r = add_length(reg, br->back_num);
! 1688: if (r) return r;
! 1689: p = BACKREFS_P(br);
! 1690: for (i = br->back_num - 1; i >= 0; i--) {
! 1691: r = add_mem_num(reg, p[i]);
! 1692: if (r) return r;
! 1693: }
! 1694: }
! 1695: }
! 1696: break;
! 1697:
! 1698: #ifdef USE_SUBEXP_CALL
! 1699: case N_CALL:
! 1700: r = compile_call(&(NCALL(node)), reg);
! 1701: break;
! 1702: #endif
! 1703:
! 1704: case N_QUANTIFIER:
! 1705: r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
! 1706: break;
! 1707:
! 1708: case N_EFFECT:
! 1709: r = compile_effect_node(&NEFFECT(node), reg);
! 1710: break;
! 1711:
! 1712: case N_ANCHOR:
! 1713: r = compile_anchor_node(&(NANCHOR(node)), reg);
! 1714: break;
! 1715:
! 1716: default:
! 1717: #ifdef ONIG_DEBUG
! 1718: fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
! 1719: #endif
! 1720: break;
! 1721: }
! 1722:
! 1723: return r;
! 1724: }
! 1725:
! 1726: #ifdef USE_NAMED_GROUP
! 1727:
! 1728: static int
! 1729: noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
! 1730: {
! 1731: int r = 0;
! 1732: Node* node = *plink;
! 1733:
! 1734: switch (NTYPE(node)) {
! 1735: case N_LIST:
! 1736: case N_ALT:
! 1737: do {
! 1738: r = noname_disable_map(&(NCONS(node).left), map, counter);
! 1739: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 1740: break;
! 1741:
! 1742: case N_QUANTIFIER:
! 1743: {
! 1744: Node** ptarget = &(NQUANTIFIER(node).target);
! 1745: Node* old = *ptarget;
! 1746: r = noname_disable_map(ptarget, map, counter);
! 1747: if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
! 1748: onig_reduce_nested_quantifier(node, *ptarget);
! 1749: }
! 1750: }
! 1751: break;
! 1752:
! 1753: case N_EFFECT:
! 1754: {
! 1755: EffectNode* en = &(NEFFECT(node));
! 1756: if (en->type == EFFECT_MEMORY) {
! 1757: if (IS_EFFECT_NAMED_GROUP(en)) {
! 1758: (*counter)++;
! 1759: map[en->regnum].new_val = *counter;
! 1760: en->regnum = *counter;
! 1761: r = noname_disable_map(&(en->target), map, counter);
! 1762: }
! 1763: else {
! 1764: *plink = en->target;
! 1765: en->target = NULL_NODE;
! 1766: onig_node_free(node);
! 1767: r = noname_disable_map(plink, map, counter);
! 1768: }
! 1769: }
! 1770: else
! 1771: r = noname_disable_map(&(en->target), map, counter);
! 1772: }
! 1773: break;
! 1774:
! 1775: default:
! 1776: break;
! 1777: }
! 1778:
! 1779: return r;
! 1780: }
! 1781:
! 1782: static int
! 1783: renumber_node_backref(Node* node, GroupNumRemap* map)
! 1784: {
! 1785: int i, pos, n, old_num;
! 1786: int *backs;
! 1787: BackrefNode* bn = &(NBACKREF(node));
! 1788:
! 1789: if (! IS_BACKREF_NAME_REF(bn))
! 1790: return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
! 1791:
! 1792: old_num = bn->back_num;
! 1793: if (IS_NULL(bn->back_dynamic))
! 1794: backs = bn->back_static;
! 1795: else
! 1796: backs = bn->back_dynamic;
! 1797:
! 1798: for (i = 0, pos = 0; i < old_num; i++) {
! 1799: n = map[backs[i]].new_val;
! 1800: if (n > 0) {
! 1801: backs[pos] = n;
! 1802: pos++;
! 1803: }
! 1804: }
! 1805:
! 1806: bn->back_num = pos;
! 1807: return 0;
! 1808: }
! 1809:
! 1810: static int
! 1811: renumber_by_map(Node* node, GroupNumRemap* map)
! 1812: {
! 1813: int r = 0;
! 1814:
! 1815: switch (NTYPE(node)) {
! 1816: case N_LIST:
! 1817: case N_ALT:
! 1818: do {
! 1819: r = renumber_by_map(NCONS(node).left, map);
! 1820: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 1821: break;
! 1822: case N_QUANTIFIER:
! 1823: r = renumber_by_map(NQUANTIFIER(node).target, map);
! 1824: break;
! 1825: case N_EFFECT:
! 1826: r = renumber_by_map(NEFFECT(node).target, map);
! 1827: break;
! 1828:
! 1829: case N_BACKREF:
! 1830: r = renumber_node_backref(node, map);
! 1831: break;
! 1832:
! 1833: default:
! 1834: break;
! 1835: }
! 1836:
! 1837: return r;
! 1838: }
! 1839:
! 1840: static int
! 1841: numbered_ref_check(Node* node)
! 1842: {
! 1843: int r = 0;
! 1844:
! 1845: switch (NTYPE(node)) {
! 1846: case N_LIST:
! 1847: case N_ALT:
! 1848: do {
! 1849: r = numbered_ref_check(NCONS(node).left);
! 1850: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 1851: break;
! 1852: case N_QUANTIFIER:
! 1853: r = numbered_ref_check(NQUANTIFIER(node).target);
! 1854: break;
! 1855: case N_EFFECT:
! 1856: r = numbered_ref_check(NEFFECT(node).target);
! 1857: break;
! 1858:
! 1859: case N_BACKREF:
! 1860: if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
! 1861: return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
! 1862: break;
! 1863:
! 1864: default:
! 1865: break;
! 1866: }
! 1867:
! 1868: return r;
! 1869: }
! 1870:
! 1871: static int
! 1872: disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
! 1873: {
! 1874: int r, i, pos, counter;
! 1875: BitStatusType loc;
! 1876: GroupNumRemap* map;
! 1877:
! 1878: map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
! 1879: CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
! 1880: for (i = 1; i <= env->num_mem; i++) {
! 1881: map[i].new_val = 0;
! 1882: }
! 1883: counter = 0;
! 1884: r = noname_disable_map(root, map, &counter);
! 1885: if (r != 0) return r;
! 1886:
! 1887: r = renumber_by_map(*root, map);
! 1888: if (r != 0) return r;
! 1889:
! 1890: for (i = 1, pos = 1; i <= env->num_mem; i++) {
! 1891: if (map[i].new_val > 0) {
! 1892: SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
! 1893: pos++;
! 1894: }
! 1895: }
! 1896:
! 1897: loc = env->capture_history;
! 1898: BIT_STATUS_CLEAR(env->capture_history);
! 1899: for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
! 1900: if (BIT_STATUS_AT(loc, i)) {
! 1901: BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
! 1902: }
! 1903: }
! 1904:
! 1905: env->num_mem = env->num_named;
! 1906: reg->num_mem = env->num_named;
! 1907:
! 1908: return onig_renumber_name_table(reg, map);
! 1909: }
! 1910: #endif /* USE_NAMED_GROUP */
! 1911:
! 1912: #ifdef USE_SUBEXP_CALL
! 1913: static int
! 1914: unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
! 1915: {
! 1916: int i, offset;
! 1917: EffectNode* en;
! 1918: AbsAddrType addr;
! 1919:
! 1920: for (i = 0; i < uslist->num; i++) {
! 1921: en = &(NEFFECT(uslist->us[i].target));
! 1922: if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
! 1923: addr = en->call_addr;
! 1924: offset = uslist->us[i].offset;
! 1925:
! 1926: BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
! 1927: }
! 1928: return 0;
! 1929: }
! 1930: #endif
! 1931:
! 1932: #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
! 1933: static int
! 1934: quantifiers_memory_node_info(Node* node)
! 1935: {
! 1936: int r = 0;
! 1937:
! 1938: switch (NTYPE(node)) {
! 1939: case N_LIST:
! 1940: case N_ALT:
! 1941: {
! 1942: int v;
! 1943: do {
! 1944: v = quantifiers_memory_node_info(NCONS(node).left);
! 1945: if (v > r) r = v;
! 1946: } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
! 1947: }
! 1948: break;
! 1949:
! 1950: #ifdef USE_SUBEXP_CALL
! 1951: case N_CALL:
! 1952: if (IS_CALL_RECURSION(&NCALL(node))) {
! 1953: return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
! 1954: }
! 1955: else
! 1956: r = quantifiers_memory_node_info(NCALL(node).target);
! 1957: break;
! 1958: #endif
! 1959:
! 1960: case N_QUANTIFIER:
! 1961: {
! 1962: QuantifierNode* qn = &(NQUANTIFIER(node));
! 1963: if (qn->upper != 0) {
! 1964: r = quantifiers_memory_node_info(qn->target);
! 1965: }
! 1966: }
! 1967: break;
! 1968:
! 1969: case N_EFFECT:
! 1970: {
! 1971: EffectNode* en = &(NEFFECT(node));
! 1972: switch (en->type) {
! 1973: case EFFECT_MEMORY:
! 1974: return NQ_TARGET_IS_EMPTY_MEM;
! 1975: break;
! 1976:
! 1977: case EFFECT_OPTION:
! 1978: case EFFECT_STOP_BACKTRACK:
! 1979: r = quantifiers_memory_node_info(en->target);
! 1980: break;
! 1981: default:
! 1982: break;
! 1983: }
! 1984: }
! 1985: break;
! 1986:
! 1987: case N_BACKREF:
! 1988: case N_STRING:
! 1989: case N_CTYPE:
! 1990: case N_CCLASS:
! 1991: case N_ANYCHAR:
! 1992: case N_ANCHOR:
! 1993: default:
! 1994: break;
! 1995: }
! 1996:
! 1997: return r;
! 1998: }
! 1999: #endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
! 2000:
! 2001: static int
! 2002: get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
! 2003: {
! 2004: OnigDistance tmin;
! 2005: int r = 0;
! 2006:
! 2007: *min = 0;
! 2008: switch (NTYPE(node)) {
! 2009: case N_BACKREF:
! 2010: {
! 2011: int i;
! 2012: int* backs;
! 2013: Node** nodes = SCANENV_MEM_NODES(env);
! 2014: BackrefNode* br = &(NBACKREF(node));
! 2015: if (br->state & NST_RECURSION) break;
! 2016:
! 2017: backs = BACKREFS_P(br);
! 2018: if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
! 2019: r = get_min_match_length(nodes[backs[0]], min, env);
! 2020: if (r != 0) break;
! 2021: for (i = 1; i < br->back_num; i++) {
! 2022: if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
! 2023: r = get_min_match_length(nodes[backs[i]], &tmin, env);
! 2024: if (r != 0) break;
! 2025: if (*min > tmin) *min = tmin;
! 2026: }
! 2027: }
! 2028: break;
! 2029:
! 2030: #ifdef USE_SUBEXP_CALL
! 2031: case N_CALL:
! 2032: if (IS_CALL_RECURSION(&NCALL(node))) {
! 2033: EffectNode* en = &(NEFFECT(NCALL(node).target));
! 2034: if (IS_EFFECT_MIN_FIXED(en))
! 2035: *min = en->min_len;
! 2036: }
! 2037: else
! 2038: r = get_min_match_length(NCALL(node).target, min, env);
! 2039: break;
! 2040: #endif
! 2041:
! 2042: case N_LIST:
! 2043: do {
! 2044: r = get_min_match_length(NCONS(node).left, &tmin, env);
! 2045: if (r == 0) *min += tmin;
! 2046: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 2047: break;
! 2048:
! 2049: case N_ALT:
! 2050: {
! 2051: Node *x, *y;
! 2052: y = node;
! 2053: do {
! 2054: x = NCONS(y).left;
! 2055: r = get_min_match_length(x, &tmin, env);
! 2056: if (r != 0) break;
! 2057: if (y == node) *min = tmin;
! 2058: else if (*min > tmin) *min = tmin;
! 2059: } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
! 2060: }
! 2061: break;
! 2062:
! 2063: case N_STRING:
! 2064: {
! 2065: StrNode* sn = &(NSTRING(node));
! 2066: *min = sn->end - sn->s;
! 2067: }
! 2068: break;
! 2069:
! 2070: case N_CTYPE:
! 2071: switch (NCTYPE(node).type) {
! 2072: case CTYPE_WORD: *min = 1; break;
! 2073: case CTYPE_NOT_WORD: *min = 1; break;
! 2074: default:
! 2075: break;
! 2076: }
! 2077: break;
! 2078:
! 2079: case N_CCLASS:
! 2080: case N_ANYCHAR:
! 2081: *min = 1;
! 2082: break;
! 2083:
! 2084: case N_QUANTIFIER:
! 2085: {
! 2086: QuantifierNode* qn = &(NQUANTIFIER(node));
! 2087:
! 2088: if (qn->lower > 0) {
! 2089: r = get_min_match_length(qn->target, min, env);
! 2090: if (r == 0)
! 2091: *min = distance_multiply(*min, qn->lower);
! 2092: }
! 2093: }
! 2094: break;
! 2095:
! 2096: case N_EFFECT:
! 2097: {
! 2098: EffectNode* en = &(NEFFECT(node));
! 2099: switch (en->type) {
! 2100: case EFFECT_MEMORY:
! 2101: #ifdef USE_SUBEXP_CALL
! 2102: if (IS_EFFECT_MIN_FIXED(en))
! 2103: *min = en->min_len;
! 2104: else {
! 2105: r = get_min_match_length(en->target, min, env);
! 2106: if (r == 0) {
! 2107: en->min_len = *min;
! 2108: SET_EFFECT_STATUS(node, NST_MIN_FIXED);
! 2109: }
! 2110: }
! 2111: break;
! 2112: #endif
! 2113: case EFFECT_OPTION:
! 2114: case EFFECT_STOP_BACKTRACK:
! 2115: r = get_min_match_length(en->target, min, env);
! 2116: break;
! 2117: }
! 2118: }
! 2119: break;
! 2120:
! 2121: case N_ANCHOR:
! 2122: default:
! 2123: break;
! 2124: }
! 2125:
! 2126: return r;
! 2127: }
! 2128:
! 2129: static int
! 2130: get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
! 2131: {
! 2132: OnigDistance tmax;
! 2133: int r = 0;
! 2134:
! 2135: *max = 0;
! 2136: switch (NTYPE(node)) {
! 2137: case N_LIST:
! 2138: do {
! 2139: r = get_max_match_length(NCONS(node).left, &tmax, env);
! 2140: if (r == 0)
! 2141: *max = distance_add(*max, tmax);
! 2142: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 2143: break;
! 2144:
! 2145: case N_ALT:
! 2146: do {
! 2147: r = get_max_match_length(NCONS(node).left, &tmax, env);
! 2148: if (r == 0 && *max < tmax) *max = tmax;
! 2149: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 2150: break;
! 2151:
! 2152: case N_STRING:
! 2153: {
! 2154: StrNode* sn = &(NSTRING(node));
! 2155: *max = sn->end - sn->s;
! 2156: }
! 2157: break;
! 2158:
! 2159: case N_CTYPE:
! 2160: switch (NCTYPE(node).type) {
! 2161: case CTYPE_WORD:
! 2162: case CTYPE_NOT_WORD:
! 2163: *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
! 2164: break;
! 2165:
! 2166: default:
! 2167: break;
! 2168: }
! 2169: break;
! 2170:
! 2171: case N_CCLASS:
! 2172: case N_ANYCHAR:
! 2173: *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
! 2174: break;
! 2175:
! 2176: case N_BACKREF:
! 2177: {
! 2178: int i;
! 2179: int* backs;
! 2180: Node** nodes = SCANENV_MEM_NODES(env);
! 2181: BackrefNode* br = &(NBACKREF(node));
! 2182: if (br->state & NST_RECURSION) {
! 2183: *max = ONIG_INFINITE_DISTANCE;
! 2184: break;
! 2185: }
! 2186: backs = BACKREFS_P(br);
! 2187: for (i = 0; i < br->back_num; i++) {
! 2188: if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
! 2189: r = get_max_match_length(nodes[backs[i]], &tmax, env);
! 2190: if (r != 0) break;
! 2191: if (*max < tmax) *max = tmax;
! 2192: }
! 2193: }
! 2194: break;
! 2195:
! 2196: #ifdef USE_SUBEXP_CALL
! 2197: case N_CALL:
! 2198: if (! IS_CALL_RECURSION(&(NCALL(node))))
! 2199: r = get_max_match_length(NCALL(node).target, max, env);
! 2200: else
! 2201: *max = ONIG_INFINITE_DISTANCE;
! 2202: break;
! 2203: #endif
! 2204:
! 2205: case N_QUANTIFIER:
! 2206: {
! 2207: QuantifierNode* qn = &(NQUANTIFIER(node));
! 2208:
! 2209: if (qn->upper != 0) {
! 2210: r = get_max_match_length(qn->target, max, env);
! 2211: if (r == 0 && *max != 0) {
! 2212: if (! IS_REPEAT_INFINITE(qn->upper))
! 2213: *max = distance_multiply(*max, qn->upper);
! 2214: else
! 2215: *max = ONIG_INFINITE_DISTANCE;
! 2216: }
! 2217: }
! 2218: }
! 2219: break;
! 2220:
! 2221: case N_EFFECT:
! 2222: {
! 2223: EffectNode* en = &(NEFFECT(node));
! 2224: switch (en->type) {
! 2225: case EFFECT_MEMORY:
! 2226: #ifdef USE_SUBEXP_CALL
! 2227: if (IS_EFFECT_MAX_FIXED(en))
! 2228: *max = en->max_len;
! 2229: else {
! 2230: r = get_max_match_length(en->target, max, env);
! 2231: if (r == 0) {
! 2232: en->max_len = *max;
! 2233: SET_EFFECT_STATUS(node, NST_MAX_FIXED);
! 2234: }
! 2235: }
! 2236: break;
! 2237: #endif
! 2238: case EFFECT_OPTION:
! 2239: case EFFECT_STOP_BACKTRACK:
! 2240: r = get_max_match_length(en->target, max, env);
! 2241: break;
! 2242: }
! 2243: }
! 2244: break;
! 2245:
! 2246: case N_ANCHOR:
! 2247: default:
! 2248: break;
! 2249: }
! 2250:
! 2251: return r;
! 2252: }
! 2253:
! 2254: #define GET_CHAR_LEN_VARLEN -1
! 2255: #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
! 2256:
! 2257: /* fixed size pattern node only */
! 2258: static int
! 2259: get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
! 2260: {
! 2261: int tlen;
! 2262: int r = 0;
! 2263:
! 2264: level++;
! 2265: *len = 0;
! 2266: switch (NTYPE(node)) {
! 2267: case N_LIST:
! 2268: do {
! 2269: r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
! 2270: if (r == 0)
! 2271: *len = distance_add(*len, tlen);
! 2272: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 2273: break;
! 2274:
! 2275: case N_ALT:
! 2276: {
! 2277: int tlen2;
! 2278: int varlen = 0;
! 2279:
! 2280: r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
! 2281: while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
! 2282: r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
! 2283: if (r == 0) {
! 2284: if (tlen != tlen2)
! 2285: varlen = 1;
! 2286: }
! 2287: }
! 2288: if (r == 0) {
! 2289: if (varlen != 0) {
! 2290: if (level == 1)
! 2291: r = GET_CHAR_LEN_TOP_ALT_VARLEN;
! 2292: else
! 2293: r = GET_CHAR_LEN_VARLEN;
! 2294: }
! 2295: else
! 2296: *len = tlen;
! 2297: }
! 2298: }
! 2299: break;
! 2300:
! 2301: case N_STRING:
! 2302: {
! 2303: StrNode* sn = &(NSTRING(node));
! 2304: UChar *s = sn->s;
! 2305: while (s < sn->end) {
! 2306: s += enc_len(reg->enc, s);
! 2307: (*len)++;
! 2308: }
! 2309: }
! 2310: break;
! 2311:
! 2312: case N_QUANTIFIER:
! 2313: {
! 2314: QuantifierNode* qn = &(NQUANTIFIER(node));
! 2315: if (qn->lower == qn->upper) {
! 2316: r = get_char_length_tree1(qn->target, reg, &tlen, level);
! 2317: if (r == 0)
! 2318: *len = distance_multiply(tlen, qn->lower);
! 2319: }
! 2320: else
! 2321: r = GET_CHAR_LEN_VARLEN;
! 2322: }
! 2323: break;
! 2324:
! 2325: #ifdef USE_SUBEXP_CALL
! 2326: case N_CALL:
! 2327: if (! IS_CALL_RECURSION(&(NCALL(node))))
! 2328: r = get_char_length_tree1(NCALL(node).target, reg, len, level);
! 2329: else
! 2330: r = GET_CHAR_LEN_VARLEN;
! 2331: break;
! 2332: #endif
! 2333:
! 2334: case N_CTYPE:
! 2335: switch (NCTYPE(node).type) {
! 2336: case CTYPE_WORD:
! 2337: case CTYPE_NOT_WORD:
! 2338: *len = 1;
! 2339: break;
! 2340: }
! 2341: break;
! 2342:
! 2343: case N_CCLASS:
! 2344: case N_ANYCHAR:
! 2345: *len = 1;
! 2346: break;
! 2347:
! 2348: case N_EFFECT:
! 2349: {
! 2350: EffectNode* en = &(NEFFECT(node));
! 2351: switch (en->type) {
! 2352: case EFFECT_MEMORY:
! 2353: #ifdef USE_SUBEXP_CALL
! 2354: if (IS_EFFECT_CLEN_FIXED(en))
! 2355: *len = en->char_len;
! 2356: else {
! 2357: r = get_char_length_tree1(en->target, reg, len, level);
! 2358: if (r == 0) {
! 2359: en->char_len = *len;
! 2360: SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
! 2361: }
! 2362: }
! 2363: break;
! 2364: #endif
! 2365: case EFFECT_OPTION:
! 2366: case EFFECT_STOP_BACKTRACK:
! 2367: r = get_char_length_tree1(en->target, reg, len, level);
! 2368: break;
! 2369: default:
! 2370: break;
! 2371: }
! 2372: }
! 2373: break;
! 2374:
! 2375: case N_ANCHOR:
! 2376: break;
! 2377:
! 2378: default:
! 2379: r = GET_CHAR_LEN_VARLEN;
! 2380: break;
! 2381: }
! 2382:
! 2383: return r;
! 2384: }
! 2385:
! 2386: static int
! 2387: get_char_length_tree(Node* node, regex_t* reg, int* len)
! 2388: {
! 2389: return get_char_length_tree1(node, reg, len, 0);
! 2390: }
! 2391:
! 2392: /* x is not included y ==> 1 : 0 */
! 2393: static int
! 2394: is_not_included(Node* x, Node* y, regex_t* reg)
! 2395: {
! 2396: int i, len;
! 2397: OnigCodePoint code;
! 2398: UChar *p, c;
! 2399: int ytype;
! 2400:
! 2401: retry:
! 2402: ytype = NTYPE(y);
! 2403: switch (NTYPE(x)) {
! 2404: case N_CTYPE:
! 2405: {
! 2406: switch (ytype) {
! 2407: case N_CTYPE:
! 2408: switch (NCTYPE(x).type) {
! 2409: case CTYPE_WORD:
! 2410: if (NCTYPE(y).type == CTYPE_NOT_WORD)
! 2411: return 1;
! 2412: else
! 2413: return 0;
! 2414: break;
! 2415: case CTYPE_NOT_WORD:
! 2416: if (NCTYPE(y).type == CTYPE_WORD)
! 2417: return 1;
! 2418: else
! 2419: return 0;
! 2420: break;
! 2421: default:
! 2422: break;
! 2423: }
! 2424: break;
! 2425:
! 2426: case N_CCLASS:
! 2427: swap:
! 2428: {
! 2429: Node* tmp;
! 2430: tmp = x; x = y; y = tmp;
! 2431: goto retry;
! 2432: }
! 2433: break;
! 2434:
! 2435: case N_STRING:
! 2436: goto swap;
! 2437: break;
! 2438:
! 2439: default:
! 2440: break;
! 2441: }
! 2442: }
! 2443: break;
! 2444:
! 2445: case N_CCLASS:
! 2446: {
! 2447: CClassNode* xc = &(NCCLASS(x));
! 2448: switch (ytype) {
! 2449: case N_CTYPE:
! 2450: switch (NCTYPE(y).type) {
! 2451: case CTYPE_WORD:
! 2452: if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
! 2453: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 2454: if (BITSET_AT(xc->bs, i)) {
! 2455: if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
! 2456: }
! 2457: }
! 2458: return 1;
! 2459: }
! 2460: return 0;
! 2461: break;
! 2462: case CTYPE_NOT_WORD:
! 2463: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 2464: if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
! 2465: if (!IS_CCLASS_NOT(xc)) {
! 2466: if (BITSET_AT(xc->bs, i))
! 2467: return 0;
! 2468: }
! 2469: else {
! 2470: if (! BITSET_AT(xc->bs, i))
! 2471: return 0;
! 2472: }
! 2473: }
! 2474: }
! 2475: return 1;
! 2476: break;
! 2477:
! 2478: default:
! 2479: break;
! 2480: }
! 2481: break;
! 2482:
! 2483: case N_CCLASS:
! 2484: {
! 2485: int v;
! 2486: CClassNode* yc = &(NCCLASS(y));
! 2487:
! 2488: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 2489: v = BITSET_AT(xc->bs, i);
! 2490: if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
! 2491: (v == 0 && IS_CCLASS_NOT(xc))) {
! 2492: v = BITSET_AT(yc->bs, i);
! 2493: if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
! 2494: (v == 0 && IS_CCLASS_NOT(yc)))
! 2495: return 0;
! 2496: }
! 2497: }
! 2498: if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
! 2499: (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
! 2500: return 1;
! 2501: return 0;
! 2502: }
! 2503: break;
! 2504:
! 2505: case N_STRING:
! 2506: goto swap;
! 2507: break;
! 2508:
! 2509: default:
! 2510: break;
! 2511: }
! 2512: }
! 2513: break;
! 2514:
! 2515: case N_STRING:
! 2516: {
! 2517: StrNode* xs = &(NSTRING(x));
! 2518: if (NSTRING_LEN(x) == 0)
! 2519: break;
! 2520:
! 2521: c = *(xs->s);
! 2522: switch (ytype) {
! 2523: case N_CTYPE:
! 2524: switch (NCTYPE(y).type) {
! 2525: case CTYPE_WORD:
! 2526: return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
! 2527: break;
! 2528: case CTYPE_NOT_WORD:
! 2529: return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
! 2530: break;
! 2531: default:
! 2532: break;
! 2533: }
! 2534: break;
! 2535:
! 2536: case N_CCLASS:
! 2537: {
! 2538: CClassNode* cc = &(NCCLASS(y));
! 2539:
! 2540: code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
! 2541: xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
! 2542: return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
! 2543: }
! 2544: break;
! 2545:
! 2546: case N_STRING:
! 2547: {
! 2548: UChar *q;
! 2549: StrNode* ys = &(NSTRING(y));
! 2550: len = NSTRING_LEN(x);
! 2551: if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
! 2552: if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
! 2553: /* tiny version */
! 2554: return 0;
! 2555: }
! 2556: else {
! 2557: for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
! 2558: if (*p != *q) return 1;
! 2559: }
! 2560: }
! 2561: }
! 2562: break;
! 2563:
! 2564: default:
! 2565: break;
! 2566: }
! 2567: }
! 2568: break;
! 2569:
! 2570: default:
! 2571: break;
! 2572: }
! 2573:
! 2574: return 0;
! 2575: }
! 2576:
! 2577: static Node*
! 2578: get_head_value_node(Node* node, int exact, regex_t* reg)
! 2579: {
! 2580: Node* n = NULL_NODE;
! 2581:
! 2582: switch (NTYPE(node)) {
! 2583: case N_BACKREF:
! 2584: case N_ALT:
! 2585: case N_ANYCHAR:
! 2586: #ifdef USE_SUBEXP_CALL
! 2587: case N_CALL:
! 2588: #endif
! 2589: break;
! 2590:
! 2591: case N_CTYPE:
! 2592: case N_CCLASS:
! 2593: if (exact == 0) {
! 2594: n = node;
! 2595: }
! 2596: break;
! 2597:
! 2598: case N_LIST:
! 2599: n = get_head_value_node(NCONS(node).left, exact, reg);
! 2600: break;
! 2601:
! 2602: case N_STRING:
! 2603: {
! 2604: StrNode* sn = &(NSTRING(node));
! 2605:
! 2606: if (sn->end <= sn->s)
! 2607: break;
! 2608:
! 2609: if (exact != 0 &&
! 2610: !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
! 2611: #if 0
! 2612: UChar* tmp = sn->s;
! 2613: if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
! 2614: &tmp, sn->end))
! 2615: n = node;
! 2616: #endif
! 2617: }
! 2618: else {
! 2619: n = node;
! 2620: }
! 2621: }
! 2622: break;
! 2623:
! 2624: case N_QUANTIFIER:
! 2625: {
! 2626: QuantifierNode* qn = &(NQUANTIFIER(node));
! 2627: if (qn->lower > 0) {
! 2628: if (IS_NOT_NULL(qn->head_exact))
! 2629: n = qn->head_exact;
! 2630: else
! 2631: n = get_head_value_node(qn->target, exact, reg);
! 2632: }
! 2633: }
! 2634: break;
! 2635:
! 2636: case N_EFFECT:
! 2637: {
! 2638: EffectNode* en = &(NEFFECT(node));
! 2639: switch (en->type) {
! 2640: case EFFECT_OPTION:
! 2641: {
! 2642: OnigOptionType options = reg->options;
! 2643:
! 2644: reg->options = NEFFECT(node).option;
! 2645: n = get_head_value_node(NEFFECT(node).target, exact, reg);
! 2646: reg->options = options;
! 2647: }
! 2648: break;
! 2649:
! 2650: case EFFECT_MEMORY:
! 2651: case EFFECT_STOP_BACKTRACK:
! 2652: n = get_head_value_node(en->target, exact, reg);
! 2653: break;
! 2654: }
! 2655: }
! 2656: break;
! 2657:
! 2658: case N_ANCHOR:
! 2659: if (NANCHOR(node).type == ANCHOR_PREC_READ)
! 2660: n = get_head_value_node(NANCHOR(node).target, exact, reg);
! 2661: break;
! 2662:
! 2663: default:
! 2664: break;
! 2665: }
! 2666:
! 2667: return n;
! 2668: }
! 2669:
! 2670: static int
! 2671: check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
! 2672: {
! 2673: int type, r = 0;
! 2674:
! 2675: type = NTYPE(node);
! 2676: if ((type & type_mask) == 0)
! 2677: return 1;
! 2678:
! 2679: switch (type) {
! 2680: case N_LIST:
! 2681: case N_ALT:
! 2682: do {
! 2683: r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
! 2684: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 2685: break;
! 2686:
! 2687: case N_QUANTIFIER:
! 2688: r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
! 2689: anchor_mask);
! 2690: break;
! 2691:
! 2692: case N_EFFECT:
! 2693: {
! 2694: EffectNode* en = &(NEFFECT(node));
! 2695: if ((en->type & effect_mask) == 0)
! 2696: return 1;
! 2697:
! 2698: r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
! 2699: }
! 2700: break;
! 2701:
! 2702: case N_ANCHOR:
! 2703: type = NANCHOR(node).type;
! 2704: if ((type & anchor_mask) == 0)
! 2705: return 1;
! 2706:
! 2707: if (NANCHOR(node).target)
! 2708: r = check_type_tree(NANCHOR(node).target,
! 2709: type_mask, effect_mask, anchor_mask);
! 2710: break;
! 2711:
! 2712: default:
! 2713: break;
! 2714: }
! 2715: return r;
! 2716: }
! 2717:
! 2718: #ifdef USE_SUBEXP_CALL
! 2719:
! 2720: #define RECURSION_EXIST 1
! 2721: #define RECURSION_INFINITE 2
! 2722:
! 2723: static int
! 2724: subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
! 2725: {
! 2726: int type;
! 2727: int r = 0;
! 2728:
! 2729: type = NTYPE(node);
! 2730: switch (type) {
! 2731: case N_LIST:
! 2732: {
! 2733: Node *x;
! 2734: OnigDistance min;
! 2735: int ret;
! 2736:
! 2737: x = node;
! 2738: do {
! 2739: ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
! 2740: if (ret < 0 || ret == RECURSION_INFINITE) return ret;
! 2741: r |= ret;
! 2742: if (head) {
! 2743: ret = get_min_match_length(NCONS(x).left, &min, env);
! 2744: if (ret != 0) return ret;
! 2745: if (min != 0) head = 0;
! 2746: }
! 2747: } while (IS_NOT_NULL(x = NCONS(x).right));
! 2748: }
! 2749: break;
! 2750:
! 2751: case N_ALT:
! 2752: {
! 2753: int ret;
! 2754: r = RECURSION_EXIST;
! 2755: do {
! 2756: ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
! 2757: if (ret < 0 || ret == RECURSION_INFINITE) return ret;
! 2758: r &= ret;
! 2759: } while (IS_NOT_NULL(node = NCONS(node).right));
! 2760: }
! 2761: break;
! 2762:
! 2763: case N_QUANTIFIER:
! 2764: r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
! 2765: if (r == RECURSION_EXIST) {
! 2766: if (NQUANTIFIER(node).lower == 0) r = 0;
! 2767: }
! 2768: break;
! 2769:
! 2770: case N_ANCHOR:
! 2771: {
! 2772: AnchorNode* an = &(NANCHOR(node));
! 2773: switch (an->type) {
! 2774: case ANCHOR_PREC_READ:
! 2775: case ANCHOR_PREC_READ_NOT:
! 2776: case ANCHOR_LOOK_BEHIND:
! 2777: case ANCHOR_LOOK_BEHIND_NOT:
! 2778: r = subexp_inf_recursive_check(an->target, env, head);
! 2779: break;
! 2780: }
! 2781: }
! 2782: break;
! 2783:
! 2784: case N_CALL:
! 2785: r = subexp_inf_recursive_check(NCALL(node).target, env, head);
! 2786: break;
! 2787:
! 2788: case N_EFFECT:
! 2789: if (IS_EFFECT_MARK2(&(NEFFECT(node))))
! 2790: return 0;
! 2791: else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
! 2792: return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
! 2793: else {
! 2794: SET_EFFECT_STATUS(node, NST_MARK2);
! 2795: r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
! 2796: CLEAR_EFFECT_STATUS(node, NST_MARK2);
! 2797: }
! 2798: break;
! 2799:
! 2800: default:
! 2801: break;
! 2802: }
! 2803:
! 2804: return r;
! 2805: }
! 2806:
! 2807: static int
! 2808: subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
! 2809: {
! 2810: int type;
! 2811: int r = 0;
! 2812:
! 2813: type = NTYPE(node);
! 2814: switch (type) {
! 2815: case N_LIST:
! 2816: case N_ALT:
! 2817: do {
! 2818: r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
! 2819: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 2820: break;
! 2821:
! 2822: case N_QUANTIFIER:
! 2823: r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
! 2824: break;
! 2825:
! 2826: case N_ANCHOR:
! 2827: {
! 2828: AnchorNode* an = &(NANCHOR(node));
! 2829: switch (an->type) {
! 2830: case ANCHOR_PREC_READ:
! 2831: case ANCHOR_PREC_READ_NOT:
! 2832: case ANCHOR_LOOK_BEHIND:
! 2833: case ANCHOR_LOOK_BEHIND_NOT:
! 2834: r = subexp_inf_recursive_check_trav(an->target, env);
! 2835: break;
! 2836: }
! 2837: }
! 2838: break;
! 2839:
! 2840: case N_EFFECT:
! 2841: {
! 2842: EffectNode* en = &(NEFFECT(node));
! 2843:
! 2844: if (IS_EFFECT_RECURSION(en)) {
! 2845: SET_EFFECT_STATUS(node, NST_MARK1);
! 2846: r = subexp_inf_recursive_check(en->target, env, 1);
! 2847: if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
! 2848: CLEAR_EFFECT_STATUS(node, NST_MARK1);
! 2849: }
! 2850: r = subexp_inf_recursive_check_trav(en->target, env);
! 2851: }
! 2852:
! 2853: break;
! 2854:
! 2855: default:
! 2856: break;
! 2857: }
! 2858:
! 2859: return r;
! 2860: }
! 2861:
! 2862: static int
! 2863: subexp_recursive_check(Node* node)
! 2864: {
! 2865: int type;
! 2866: int r = 0;
! 2867:
! 2868: type = NTYPE(node);
! 2869: switch (type) {
! 2870: case N_LIST:
! 2871: case N_ALT:
! 2872: do {
! 2873: r |= subexp_recursive_check(NCONS(node).left);
! 2874: } while (IS_NOT_NULL(node = NCONS(node).right));
! 2875: break;
! 2876:
! 2877: case N_QUANTIFIER:
! 2878: r = subexp_recursive_check(NQUANTIFIER(node).target);
! 2879: break;
! 2880:
! 2881: case N_ANCHOR:
! 2882: {
! 2883: AnchorNode* an = &(NANCHOR(node));
! 2884: switch (an->type) {
! 2885: case ANCHOR_PREC_READ:
! 2886: case ANCHOR_PREC_READ_NOT:
! 2887: case ANCHOR_LOOK_BEHIND:
! 2888: case ANCHOR_LOOK_BEHIND_NOT:
! 2889: r = subexp_recursive_check(an->target);
! 2890: break;
! 2891: }
! 2892: }
! 2893: break;
! 2894:
! 2895: case N_CALL:
! 2896: r = subexp_recursive_check(NCALL(node).target);
! 2897: if (r != 0) SET_CALL_RECURSION(node);
! 2898: break;
! 2899:
! 2900: case N_EFFECT:
! 2901: if (IS_EFFECT_MARK2(&(NEFFECT(node))))
! 2902: return 0;
! 2903: else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
! 2904: return 1; /* recursion */
! 2905: else {
! 2906: SET_EFFECT_STATUS(node, NST_MARK2);
! 2907: r = subexp_recursive_check(NEFFECT(node).target);
! 2908: CLEAR_EFFECT_STATUS(node, NST_MARK2);
! 2909: }
! 2910: break;
! 2911:
! 2912: default:
! 2913: break;
! 2914: }
! 2915:
! 2916: return r;
! 2917: }
! 2918:
! 2919:
! 2920: static int
! 2921: subexp_recursive_check_trav(Node* node, ScanEnv* env)
! 2922: {
! 2923: #define FOUND_CALLED_NODE 1
! 2924:
! 2925: int type;
! 2926: int r = 0;
! 2927:
! 2928: type = NTYPE(node);
! 2929: switch (type) {
! 2930: case N_LIST:
! 2931: case N_ALT:
! 2932: {
! 2933: int ret;
! 2934: do {
! 2935: ret = subexp_recursive_check_trav(NCONS(node).left, env);
! 2936: if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
! 2937: else if (ret < 0) return ret;
! 2938: } while (IS_NOT_NULL(node = NCONS(node).right));
! 2939: }
! 2940: break;
! 2941:
! 2942: case N_QUANTIFIER:
! 2943: r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
! 2944: if (NQUANTIFIER(node).upper == 0) {
! 2945: if (r == FOUND_CALLED_NODE)
! 2946: NQUANTIFIER(node).is_refered = 1;
! 2947: }
! 2948: break;
! 2949:
! 2950: case N_ANCHOR:
! 2951: {
! 2952: AnchorNode* an = &(NANCHOR(node));
! 2953: switch (an->type) {
! 2954: case ANCHOR_PREC_READ:
! 2955: case ANCHOR_PREC_READ_NOT:
! 2956: case ANCHOR_LOOK_BEHIND:
! 2957: case ANCHOR_LOOK_BEHIND_NOT:
! 2958: r = subexp_recursive_check_trav(an->target, env);
! 2959: break;
! 2960: }
! 2961: }
! 2962: break;
! 2963:
! 2964: case N_EFFECT:
! 2965: {
! 2966: EffectNode* en = &(NEFFECT(node));
! 2967:
! 2968: if (! IS_EFFECT_RECURSION(en)) {
! 2969: if (IS_EFFECT_CALLED(en)) {
! 2970: SET_EFFECT_STATUS(node, NST_MARK1);
! 2971: r = subexp_recursive_check(en->target);
! 2972: if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
! 2973: CLEAR_EFFECT_STATUS(node, NST_MARK1);
! 2974: }
! 2975: }
! 2976: r = subexp_recursive_check_trav(en->target, env);
! 2977: if (IS_EFFECT_CALLED(en))
! 2978: r |= FOUND_CALLED_NODE;
! 2979: }
! 2980: break;
! 2981:
! 2982: default:
! 2983: break;
! 2984: }
! 2985:
! 2986: return r;
! 2987: }
! 2988:
! 2989: static int
! 2990: setup_subexp_call(Node* node, ScanEnv* env)
! 2991: {
! 2992: int type;
! 2993: int r = 0;
! 2994:
! 2995: type = NTYPE(node);
! 2996: switch (type) {
! 2997: case N_LIST:
! 2998: do {
! 2999: r = setup_subexp_call(NCONS(node).left, env);
! 3000: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 3001: break;
! 3002:
! 3003: case N_ALT:
! 3004: do {
! 3005: r = setup_subexp_call(NCONS(node).left, env);
! 3006: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 3007: break;
! 3008:
! 3009: case N_QUANTIFIER:
! 3010: r = setup_subexp_call(NQUANTIFIER(node).target, env);
! 3011: break;
! 3012: case N_EFFECT:
! 3013: r = setup_subexp_call(NEFFECT(node).target, env);
! 3014: break;
! 3015:
! 3016: case N_CALL:
! 3017: {
! 3018: int n, num, *refs;
! 3019: UChar *p;
! 3020: CallNode* cn = &(NCALL(node));
! 3021: Node** nodes = SCANENV_MEM_NODES(env);
! 3022:
! 3023: #ifdef USE_NAMED_GROUP
! 3024: n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
! 3025: #else
! 3026: n = -1;
! 3027: #endif
! 3028: if (n <= 0) {
! 3029: /* name not found, check group number. (?*ddd) */
! 3030: p = cn->name;
! 3031: num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
! 3032: if (num <= 0 || p != cn->name_end) {
! 3033: onig_scan_env_set_error_string(env,
! 3034: ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
! 3035: return ONIGERR_UNDEFINED_NAME_REFERENCE;
! 3036: }
! 3037: #ifdef USE_NAMED_GROUP
! 3038: if (env->num_named > 0 &&
! 3039: IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
! 3040: !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
! 3041: return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
! 3042: }
! 3043: #endif
! 3044: if (num > env->num_mem) {
! 3045: onig_scan_env_set_error_string(env,
! 3046: ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
! 3047: return ONIGERR_UNDEFINED_GROUP_REFERENCE;
! 3048: }
! 3049: cn->ref_num = num;
! 3050: goto set_call_attr;
! 3051: }
! 3052: else if (n > 1) {
! 3053: onig_scan_env_set_error_string(env,
! 3054: ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
! 3055: return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
! 3056: }
! 3057: else {
! 3058: cn->ref_num = refs[0];
! 3059: set_call_attr:
! 3060: cn->target = nodes[cn->ref_num];
! 3061: if (IS_NULL(cn->target)) {
! 3062: onig_scan_env_set_error_string(env,
! 3063: ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
! 3064: return ONIGERR_UNDEFINED_NAME_REFERENCE;
! 3065: }
! 3066: SET_EFFECT_STATUS(cn->target, NST_CALLED);
! 3067: BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
! 3068: cn->unset_addr_list = env->unset_addr_list;
! 3069: }
! 3070: }
! 3071: break;
! 3072:
! 3073: case N_ANCHOR:
! 3074: {
! 3075: AnchorNode* an = &(NANCHOR(node));
! 3076:
! 3077: switch (an->type) {
! 3078: case ANCHOR_PREC_READ:
! 3079: case ANCHOR_PREC_READ_NOT:
! 3080: case ANCHOR_LOOK_BEHIND:
! 3081: case ANCHOR_LOOK_BEHIND_NOT:
! 3082: r = setup_subexp_call(an->target, env);
! 3083: break;
! 3084: }
! 3085: }
! 3086: break;
! 3087:
! 3088: default:
! 3089: break;
! 3090: }
! 3091:
! 3092: return r;
! 3093: }
! 3094: #endif
! 3095:
! 3096: /* divide different length alternatives in look-behind.
! 3097: (?<=A|B) ==> (?<=A)|(?<=B)
! 3098: (?<!A|B) ==> (?<!A)(?<!B)
! 3099: */
! 3100: static int
! 3101: divide_look_behind_alternatives(Node* node)
! 3102: {
! 3103: Node tmp_node;
! 3104: Node *head, *np, *insert_node;
! 3105: AnchorNode* an = &(NANCHOR(node));
! 3106: int anc_type = an->type;
! 3107:
! 3108: head = an->target;
! 3109: np = NCONS(head).left;
! 3110: tmp_node = *node; *node = *head; *head = tmp_node;
! 3111: NCONS(node).left = head;
! 3112: NANCHOR(head).target = np;
! 3113:
! 3114: np = node;
! 3115: while ((np = NCONS(np).right) != NULL_NODE) {
! 3116: insert_node = onig_node_new_anchor(anc_type);
! 3117: CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
! 3118: NANCHOR(insert_node).target = NCONS(np).left;
! 3119: NCONS(np).left = insert_node;
! 3120: }
! 3121:
! 3122: if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
! 3123: np = node;
! 3124: do {
! 3125: np->type = N_LIST; /* alt -> list */
! 3126: } while ((np = NCONS(np).right) != NULL_NODE);
! 3127: }
! 3128: return 0;
! 3129: }
! 3130:
! 3131: static int
! 3132: setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
! 3133: {
! 3134: int r, len;
! 3135: AnchorNode* an = &(NANCHOR(node));
! 3136:
! 3137: r = get_char_length_tree(an->target, reg, &len);
! 3138: if (r == 0)
! 3139: an->char_len = len;
! 3140: else if (r == GET_CHAR_LEN_VARLEN)
! 3141: r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
! 3142: else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
! 3143: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
! 3144: r = divide_look_behind_alternatives(node);
! 3145: else
! 3146: r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
! 3147: }
! 3148:
! 3149: return r;
! 3150: }
! 3151:
! 3152: static int
! 3153: next_setup(Node* node, Node* next_node, regex_t* reg)
! 3154: {
! 3155: int type;
! 3156:
! 3157: retry:
! 3158: type = NTYPE(node);
! 3159: if (type == N_QUANTIFIER) {
! 3160: QuantifierNode* qn = &(NQUANTIFIER(node));
! 3161: if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
! 3162: #ifdef USE_QUANTIFIER_PEEK_NEXT
! 3163: qn->next_head_exact = get_head_value_node(next_node, 1, reg);
! 3164: #endif
! 3165: /* automatic posseivation a*b ==> (?>a*)b */
! 3166: if (qn->lower <= 1) {
! 3167: int ttype = NTYPE(qn->target);
! 3168: if (IS_NODE_TYPE_SIMPLE(ttype)) {
! 3169: Node *x, *y;
! 3170: x = get_head_value_node(qn->target, 0, reg);
! 3171: if (IS_NOT_NULL(x)) {
! 3172: y = get_head_value_node(next_node, 0, reg);
! 3173: if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
! 3174: Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
! 3175: CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
! 3176: SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
! 3177: swap_node(node, en);
! 3178: NEFFECT(node).target = en;
! 3179: }
! 3180: }
! 3181: }
! 3182: }
! 3183: }
! 3184: }
! 3185: else if (type == N_EFFECT) {
! 3186: EffectNode* en = &(NEFFECT(node));
! 3187: if (en->type == EFFECT_MEMORY) {
! 3188: node = en->target;
! 3189: goto retry;
! 3190: }
! 3191: }
! 3192: return 0;
! 3193: }
! 3194:
! 3195:
! 3196: static int
! 3197: divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
! 3198: UChar* prev_start, UChar* prev,
! 3199: UChar* end, Node*** tailp, Node** root)
! 3200: {
! 3201: UChar *tmp, *wp;
! 3202: Node* snode;
! 3203:
! 3204: if (prev_ambig != 0) {
! 3205: tmp = prev_start;
! 3206: wp = prev_start;
! 3207: while (tmp < prev) {
! 3208: wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
! 3209: &tmp, end, wp);
! 3210: }
! 3211: snode = onig_node_new_str(prev_start, wp);
! 3212: CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
! 3213: NSTRING_SET_AMBIG(snode);
! 3214: if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
! 3215: }
! 3216: else {
! 3217: snode = onig_node_new_str(prev_start, prev);
! 3218: CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
! 3219: }
! 3220:
! 3221: if (*tailp == (Node** )0) {
! 3222: *root = onig_node_new_list(snode, NULL);
! 3223: CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
! 3224: *tailp = &(NCONS(*root).right);
! 3225: }
! 3226: else {
! 3227: **tailp = onig_node_new_list(snode, NULL);
! 3228: CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
! 3229: *tailp = &(NCONS(**tailp).right);
! 3230: }
! 3231:
! 3232: return 0;
! 3233: }
! 3234:
! 3235: static int
! 3236: divide_ambig_string_node(Node* node, regex_t* reg)
! 3237: {
! 3238: StrNode* sn = &NSTRING(node);
! 3239: int ambig, prev_ambig;
! 3240: UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
! 3241: Node *root = NULL_NODE;
! 3242: Node **tailp = (Node** )0;
! 3243: int r;
! 3244:
! 3245: start = prev_start = p = sn->s;
! 3246: end = sn->end;
! 3247: if (p >= end) return 0;
! 3248:
! 3249: prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
! 3250:
! 3251: while (p < end) {
! 3252: prev = p;
! 3253: if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
! 3254: reg->ambig_flag, &p, end))) {
! 3255:
! 3256: r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
! 3257: end, &tailp, &root);
! 3258: if (r != 0) return r;
! 3259:
! 3260: prev_ambig = ambig;
! 3261: prev_start = prev;
! 3262: }
! 3263: }
! 3264:
! 3265: if (prev_start == start) {
! 3266: if (prev_ambig != 0) {
! 3267: NSTRING_SET_AMBIG(node);
! 3268: tmp = start;
! 3269: wp = start;
! 3270: while (tmp < end) {
! 3271: wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
! 3272: &tmp, end, wp);
! 3273: }
! 3274: if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
! 3275: sn->end = wp;
! 3276: }
! 3277: }
! 3278: else {
! 3279: r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
! 3280: end, &tailp, &root);
! 3281: if (r != 0) return r;
! 3282:
! 3283: swap_node(node, root);
! 3284: onig_node_str_clear(root); /* should be after swap! */
! 3285: onig_node_free(root); /* free original string node */
! 3286: }
! 3287:
! 3288: return 0;
! 3289: }
! 3290:
! 3291: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 3292:
! 3293: #define CEC_THRES_NUM_BIG_REPEAT 512
! 3294: #define CEC_INFINITE_NUM 0x7fffffff
! 3295:
! 3296: #define CEC_IN_INFINITE_REPEAT (1<<0)
! 3297: #define CEC_IN_FINITE_REPEAT (1<<1)
! 3298: #define CEC_CONT_BIG_REPEAT (1<<2)
! 3299:
! 3300: static int
! 3301: setup_comb_exp_check(Node* node, int state, ScanEnv* env)
! 3302: {
! 3303: int type;
! 3304: int r = state;
! 3305:
! 3306: type = NTYPE(node);
! 3307: switch (type) {
! 3308: case N_LIST:
! 3309: {
! 3310: Node* prev = NULL_NODE;
! 3311: do {
! 3312: r = setup_comb_exp_check(NCONS(node).left, r, env);
! 3313: prev = NCONS(node).left;
! 3314: } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
! 3315: }
! 3316: break;
! 3317:
! 3318: case N_ALT:
! 3319: {
! 3320: int ret;
! 3321: do {
! 3322: ret = setup_comb_exp_check(NCONS(node).left, state, env);
! 3323: r |= ret;
! 3324: } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
! 3325: }
! 3326: break;
! 3327:
! 3328: case N_QUANTIFIER:
! 3329: {
! 3330: int child_state = state;
! 3331: int add_state = 0;
! 3332: QuantifierNode* qn = &(NQUANTIFIER(node));
! 3333: Node* target = qn->target;
! 3334: int var_num;
! 3335:
! 3336: if (! IS_REPEAT_INFINITE(qn->upper)) {
! 3337: if (qn->upper > 1) {
! 3338: /* {0,1}, {1,1} are allowed */
! 3339: child_state |= CEC_IN_FINITE_REPEAT;
! 3340:
! 3341: /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
! 3342: if (env->backrefed_mem == 0) {
! 3343: if (NTYPE(qn->target) == N_EFFECT) {
! 3344: EffectNode* en = &(NEFFECT(qn->target));
! 3345: if (en->type == EFFECT_MEMORY) {
! 3346: if (NTYPE(en->target) == N_QUANTIFIER) {
! 3347: QuantifierNode* q = &(NQUANTIFIER(en->target));
! 3348: if (IS_REPEAT_INFINITE(q->upper)
! 3349: && q->greedy == qn->greedy) {
! 3350: qn->upper = (qn->lower == 0 ? 1 : qn->lower);
! 3351: if (qn->upper == 1)
! 3352: child_state = state;
! 3353: }
! 3354: }
! 3355: }
! 3356: }
! 3357: }
! 3358: }
! 3359: }
! 3360:
! 3361: if (state & CEC_IN_FINITE_REPEAT) {
! 3362: qn->comb_exp_check_num = -1;
! 3363: }
! 3364: else {
! 3365: if (IS_REPEAT_INFINITE(qn->upper)) {
! 3366: var_num = CEC_INFINITE_NUM;
! 3367: child_state |= CEC_IN_INFINITE_REPEAT;
! 3368: }
! 3369: else {
! 3370: var_num = qn->upper - qn->lower;
! 3371: }
! 3372:
! 3373: if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
! 3374: add_state |= CEC_CONT_BIG_REPEAT;
! 3375:
! 3376: if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
! 3377: ((state & CEC_CONT_BIG_REPEAT) != 0 &&
! 3378: var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
! 3379: if (qn->comb_exp_check_num == 0) {
! 3380: env->num_comb_exp_check++;
! 3381: qn->comb_exp_check_num = env->num_comb_exp_check;
! 3382: if (env->curr_max_regnum > env->comb_exp_max_regnum)
! 3383: env->comb_exp_max_regnum = env->curr_max_regnum;
! 3384: }
! 3385: }
! 3386: }
! 3387:
! 3388: r = setup_comb_exp_check(target, child_state, env);
! 3389: r |= add_state;
! 3390: }
! 3391: break;
! 3392:
! 3393: case N_EFFECT:
! 3394: {
! 3395: EffectNode* en = &(NEFFECT(node));
! 3396:
! 3397: switch (en->type) {
! 3398: case EFFECT_MEMORY:
! 3399: {
! 3400: if (env->curr_max_regnum < en->regnum)
! 3401: env->curr_max_regnum = en->regnum;
! 3402:
! 3403: r = setup_comb_exp_check(en->target, state, env);
! 3404: }
! 3405: break;
! 3406:
! 3407: default:
! 3408: r = setup_comb_exp_check(en->target, state, env);
! 3409: break;
! 3410: }
! 3411: }
! 3412: break;
! 3413:
! 3414: #ifdef USE_SUBEXP_CALL
! 3415: case N_CALL:
! 3416: if (IS_CALL_RECURSION(&(NCALL(node))))
! 3417: env->has_recursion = 1;
! 3418: else
! 3419: r = setup_comb_exp_check(NCALL(node).target, state, env);
! 3420: break;
! 3421: #endif
! 3422:
! 3423: default:
! 3424: break;
! 3425: }
! 3426:
! 3427: return r;
! 3428: }
! 3429: #endif
! 3430:
! 3431: #define IN_ALT (1<<0)
! 3432: #define IN_NOT (1<<1)
! 3433: #define IN_REPEAT (1<<2)
! 3434: #define IN_VAR_REPEAT (1<<3)
! 3435:
! 3436: /* setup_tree does the following work.
! 3437: 1. check empty loop. (set qn->target_empty_info)
! 3438: 2. expand ignore-case in char class.
! 3439: 3. set memory status bit flags. (reg->mem_stats)
! 3440: 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
! 3441: 5. find invalid patterns in look-behind.
! 3442: 6. expand repeated string.
! 3443: */
! 3444: static int
! 3445: setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
! 3446: {
! 3447: int type;
! 3448: int r = 0;
! 3449:
! 3450: type = NTYPE(node);
! 3451: switch (type) {
! 3452: case N_LIST:
! 3453: {
! 3454: Node* prev = NULL_NODE;
! 3455: do {
! 3456: r = setup_tree(NCONS(node).left, reg, state, env);
! 3457: if (IS_NOT_NULL(prev) && r == 0) {
! 3458: r = next_setup(prev, NCONS(node).left, reg);
! 3459: }
! 3460: prev = NCONS(node).left;
! 3461: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 3462: }
! 3463: break;
! 3464:
! 3465: case N_ALT:
! 3466: do {
! 3467: r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
! 3468: } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
! 3469: break;
! 3470:
! 3471: case N_CCLASS:
! 3472: break;
! 3473:
! 3474: case N_STRING:
! 3475: if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
! 3476: r = divide_ambig_string_node(node, reg);
! 3477: }
! 3478: break;
! 3479:
! 3480: case N_CTYPE:
! 3481: case N_ANYCHAR:
! 3482: break;
! 3483:
! 3484: #ifdef USE_SUBEXP_CALL
! 3485: case N_CALL:
! 3486: break;
! 3487: #endif
! 3488:
! 3489: case N_BACKREF:
! 3490: {
! 3491: int i;
! 3492: int* p;
! 3493: Node** nodes = SCANENV_MEM_NODES(env);
! 3494: BackrefNode* br = &(NBACKREF(node));
! 3495: p = BACKREFS_P(br);
! 3496: for (i = 0; i < br->back_num; i++) {
! 3497: if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
! 3498: BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
! 3499: BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
! 3500: #ifdef USE_BACKREF_AT_LEVEL
! 3501: if (IS_BACKREF_NEST_LEVEL(br)) {
! 3502: BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
! 3503: }
! 3504: #endif
! 3505: SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
! 3506: }
! 3507: }
! 3508: break;
! 3509:
! 3510: case N_QUANTIFIER:
! 3511: {
! 3512: OnigDistance d;
! 3513: QuantifierNode* qn = &(NQUANTIFIER(node));
! 3514: Node* target = qn->target;
! 3515:
! 3516: if ((state & IN_REPEAT) != 0) {
! 3517: qn->state |= NST_IN_REPEAT;
! 3518: }
! 3519:
! 3520: if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
! 3521: r = get_min_match_length(target, &d, env);
! 3522: if (r) break;
! 3523: if (d == 0) {
! 3524: qn->target_empty_info = NQ_TARGET_IS_EMPTY;
! 3525: #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
! 3526: r = quantifiers_memory_node_info(target);
! 3527: if (r < 0) break;
! 3528: if (r > 0) {
! 3529: qn->target_empty_info = r;
! 3530: }
! 3531: #endif
! 3532: #if 0
! 3533: r = get_max_match_length(target, &d, env);
! 3534: if (r == 0 && d == 0) {
! 3535: /* ()* ==> ()?, ()+ ==> () */
! 3536: qn->upper = 1;
! 3537: if (qn->lower > 1) qn->lower = 1;
! 3538: if (NTYPE(target) == N_STRING) {
! 3539: qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
! 3540: }
! 3541: }
! 3542: #endif
! 3543: }
! 3544: }
! 3545:
! 3546: state |= IN_REPEAT;
! 3547: if (qn->lower != qn->upper)
! 3548: state |= IN_VAR_REPEAT;
! 3549: r = setup_tree(target, reg, state, env);
! 3550: if (r) break;
! 3551:
! 3552: /* expand string */
! 3553: #define EXPAND_STRING_MAX_LENGTH 100
! 3554: if (NTYPE(target) == N_STRING) {
! 3555: if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
! 3556: qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
! 3557: int len = NSTRING_LEN(target);
! 3558: StrNode* sn = &(NSTRING(target));
! 3559:
! 3560: if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
! 3561: int i, n = qn->lower;
! 3562: onig_node_conv_to_str_node(node, NSTRING(target).flag);
! 3563: for (i = 0; i < n; i++) {
! 3564: r = onig_node_str_cat(node, sn->s, sn->end);
! 3565: if (r) break;
! 3566: }
! 3567: onig_node_free(target);
! 3568: break; /* break case N_QUANTIFIER: */
! 3569: }
! 3570: }
! 3571: }
! 3572:
! 3573: #ifdef USE_OP_PUSH_OR_JUMP_EXACT
! 3574: if (qn->greedy && (qn->target_empty_info != 0)) {
! 3575: if (NTYPE(target) == N_QUANTIFIER) {
! 3576: QuantifierNode* tqn = &(NQUANTIFIER(target));
! 3577: if (IS_NOT_NULL(tqn->head_exact)) {
! 3578: qn->head_exact = tqn->head_exact;
! 3579: tqn->head_exact = NULL;
! 3580: }
! 3581: }
! 3582: else {
! 3583: qn->head_exact = get_head_value_node(qn->target, 1, reg);
! 3584: }
! 3585: }
! 3586: #endif
! 3587: }
! 3588: break;
! 3589:
! 3590: case N_EFFECT:
! 3591: {
! 3592: EffectNode* en = &(NEFFECT(node));
! 3593:
! 3594: switch (en->type) {
! 3595: case EFFECT_OPTION:
! 3596: {
! 3597: OnigOptionType options = reg->options;
! 3598: reg->options = NEFFECT(node).option;
! 3599: r = setup_tree(NEFFECT(node).target, reg, state, env);
! 3600: reg->options = options;
! 3601: }
! 3602: break;
! 3603:
! 3604: case EFFECT_MEMORY:
! 3605: if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
! 3606: BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
! 3607: /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
! 3608: }
! 3609: r = setup_tree(en->target, reg, state, env);
! 3610: break;
! 3611:
! 3612: case EFFECT_STOP_BACKTRACK:
! 3613: {
! 3614: Node* target = en->target;
! 3615: r = setup_tree(target, reg, state, env);
! 3616: if (NTYPE(target) == N_QUANTIFIER) {
! 3617: QuantifierNode* tqn = &(NQUANTIFIER(target));
! 3618: if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
! 3619: tqn->greedy != 0) { /* (?>a*), a*+ etc... */
! 3620: int qtype = NTYPE(tqn->target);
! 3621: if (IS_NODE_TYPE_SIMPLE(qtype))
! 3622: SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
! 3623: }
! 3624: }
! 3625: }
! 3626: break;
! 3627: }
! 3628: }
! 3629: break;
! 3630:
! 3631: case N_ANCHOR:
! 3632: {
! 3633: AnchorNode* an = &(NANCHOR(node));
! 3634:
! 3635: switch (an->type) {
! 3636: case ANCHOR_PREC_READ:
! 3637: r = setup_tree(an->target, reg, state, env);
! 3638: break;
! 3639: case ANCHOR_PREC_READ_NOT:
! 3640: r = setup_tree(an->target, reg, (state | IN_NOT), env);
! 3641: break;
! 3642:
! 3643: /* allowed node types in look-behind */
! 3644: #define ALLOWED_TYPE_IN_LB \
! 3645: ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
! 3646: N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
! 3647:
! 3648: #define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY )
! 3649: #define ALLOWED_EFFECT_IN_LB_NOT 0
! 3650:
! 3651: #define ALLOWED_ANCHOR_IN_LB \
! 3652: ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
! 3653: #define ALLOWED_ANCHOR_IN_LB_NOT \
! 3654: ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
! 3655:
! 3656: case ANCHOR_LOOK_BEHIND:
! 3657: {
! 3658: r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
! 3659: ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
! 3660: if (r < 0) return r;
! 3661: if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
! 3662: r = setup_look_behind(node, reg, env);
! 3663: if (r != 0) return r;
! 3664: r = setup_tree(an->target, reg, state, env);
! 3665: }
! 3666: break;
! 3667:
! 3668: case ANCHOR_LOOK_BEHIND_NOT:
! 3669: {
! 3670: r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
! 3671: ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
! 3672: if (r < 0) return r;
! 3673: if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
! 3674: r = setup_look_behind(node, reg, env);
! 3675: if (r != 0) return r;
! 3676: r = setup_tree(an->target, reg, (state | IN_NOT), env);
! 3677: }
! 3678: break;
! 3679: }
! 3680: }
! 3681: break;
! 3682:
! 3683: default:
! 3684: break;
! 3685: }
! 3686:
! 3687: return r;
! 3688: }
! 3689:
! 3690: /* set skip map for Boyer-Moor search */
! 3691: static int
! 3692: set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
! 3693: UChar skip[], int** int_skip)
! 3694: {
! 3695: int i, len;
! 3696:
! 3697: len = end - s;
! 3698: if (len < ONIG_CHAR_TABLE_SIZE) {
! 3699: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
! 3700:
! 3701: for (i = 0; i < len - 1; i++)
! 3702: skip[s[i]] = len - 1 - i;
! 3703: }
! 3704: else {
! 3705: if (IS_NULL(*int_skip)) {
! 3706: *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
! 3707: if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
! 3708: }
! 3709: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
! 3710:
! 3711: for (i = 0; i < len - 1; i++)
! 3712: (*int_skip)[s[i]] = len - 1 - i;
! 3713: }
! 3714: return 0;
! 3715: }
! 3716:
! 3717: #define OPT_EXACT_MAXLEN 24
! 3718:
! 3719: typedef struct {
! 3720: OnigDistance min; /* min byte length */
! 3721: OnigDistance max; /* max byte length */
! 3722: } MinMaxLen;
! 3723:
! 3724: typedef struct {
! 3725: MinMaxLen mmd;
! 3726: OnigEncoding enc;
! 3727: OnigOptionType options;
! 3728: OnigAmbigType ambig_flag;
! 3729: ScanEnv* scan_env;
! 3730: } OptEnv;
! 3731:
! 3732: typedef struct {
! 3733: int left_anchor;
! 3734: int right_anchor;
! 3735: } OptAncInfo;
! 3736:
! 3737: typedef struct {
! 3738: MinMaxLen mmd; /* info position */
! 3739: OptAncInfo anc;
! 3740:
! 3741: int reach_end;
! 3742: int ignore_case;
! 3743: int len;
! 3744: UChar s[OPT_EXACT_MAXLEN];
! 3745: } OptExactInfo;
! 3746:
! 3747: typedef struct {
! 3748: MinMaxLen mmd; /* info position */
! 3749: OptAncInfo anc;
! 3750:
! 3751: int value; /* weighted value */
! 3752: UChar map[ONIG_CHAR_TABLE_SIZE];
! 3753: } OptMapInfo;
! 3754:
! 3755: typedef struct {
! 3756: MinMaxLen len;
! 3757:
! 3758: OptAncInfo anc;
! 3759: OptExactInfo exb; /* boundary */
! 3760: OptExactInfo exm; /* middle */
! 3761: OptExactInfo expr; /* prec read (?=...) */
! 3762:
! 3763: OptMapInfo map; /* boundary */
! 3764: } NodeOptInfo;
! 3765:
! 3766:
! 3767: static int
! 3768: map_position_value(OnigEncoding enc, int i)
! 3769: {
! 3770: static const short int ByteValTable[] = {
! 3771: 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
! 3772: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 3773: 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
! 3774: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
! 3775: 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
! 3776: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
! 3777: 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
! 3778: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
! 3779: };
! 3780:
! 3781: if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
! 3782: if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
! 3783: return 20;
! 3784: else
! 3785: return (int )ByteValTable[i];
! 3786: }
! 3787: else
! 3788: return 4; /* Take it easy. */
! 3789: }
! 3790:
! 3791: static int
! 3792: distance_value(MinMaxLen* mm)
! 3793: {
! 3794: /* 1000 / (min-max-dist + 1) */
! 3795: static const short int dist_vals[] = {
! 3796: 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
! 3797: 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
! 3798: 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
! 3799: 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
! 3800: 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
! 3801: 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
! 3802: 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
! 3803: 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
! 3804: 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
! 3805: 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
! 3806: };
! 3807:
! 3808: int d;
! 3809:
! 3810: if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
! 3811:
! 3812: d = mm->max - mm->min;
! 3813: if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
! 3814: /* return dist_vals[d] * 16 / (mm->min + 12); */
! 3815: return (int )dist_vals[d];
! 3816: else
! 3817: return 1;
! 3818: }
! 3819:
! 3820: static int
! 3821: comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
! 3822: {
! 3823: if (v2 <= 0) return -1;
! 3824: if (v1 <= 0) return 1;
! 3825:
! 3826: v1 *= distance_value(d1);
! 3827: v2 *= distance_value(d2);
! 3828:
! 3829: if (v2 > v1) return 1;
! 3830: if (v2 < v1) return -1;
! 3831:
! 3832: if (d2->min < d1->min) return 1;
! 3833: if (d2->min > d1->min) return -1;
! 3834: return 0;
! 3835: }
! 3836:
! 3837: static int
! 3838: is_equal_mml(MinMaxLen* a, MinMaxLen* b)
! 3839: {
! 3840: return (a->min == b->min && a->max == b->max) ? 1 : 0;
! 3841: }
! 3842:
! 3843:
! 3844: static void
! 3845: set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
! 3846: {
! 3847: mml->min = min;
! 3848: mml->max = max;
! 3849: }
! 3850:
! 3851: static void
! 3852: clear_mml(MinMaxLen* mml)
! 3853: {
! 3854: mml->min = mml->max = 0;
! 3855: }
! 3856:
! 3857: static void
! 3858: copy_mml(MinMaxLen* to, MinMaxLen* from)
! 3859: {
! 3860: to->min = from->min;
! 3861: to->max = from->max;
! 3862: }
! 3863:
! 3864: static void
! 3865: add_mml(MinMaxLen* to, MinMaxLen* from)
! 3866: {
! 3867: to->min = distance_add(to->min, from->min);
! 3868: to->max = distance_add(to->max, from->max);
! 3869: }
! 3870:
! 3871: #if 0
! 3872: static void
! 3873: add_len_mml(MinMaxLen* to, OnigDistance len)
! 3874: {
! 3875: to->min = distance_add(to->min, len);
! 3876: to->max = distance_add(to->max, len);
! 3877: }
! 3878: #endif
! 3879:
! 3880: static void
! 3881: alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
! 3882: {
! 3883: if (to->min > from->min) to->min = from->min;
! 3884: if (to->max < from->max) to->max = from->max;
! 3885: }
! 3886:
! 3887: static void
! 3888: copy_opt_env(OptEnv* to, OptEnv* from)
! 3889: {
! 3890: *to = *from;
! 3891: }
! 3892:
! 3893: static void
! 3894: clear_opt_anc_info(OptAncInfo* anc)
! 3895: {
! 3896: anc->left_anchor = 0;
! 3897: anc->right_anchor = 0;
! 3898: }
! 3899:
! 3900: static void
! 3901: copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
! 3902: {
! 3903: *to = *from;
! 3904: }
! 3905:
! 3906: static void
! 3907: concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
! 3908: OnigDistance left_len, OnigDistance right_len)
! 3909: {
! 3910: clear_opt_anc_info(to);
! 3911:
! 3912: to->left_anchor = left->left_anchor;
! 3913: if (left_len == 0) {
! 3914: to->left_anchor |= right->left_anchor;
! 3915: }
! 3916:
! 3917: to->right_anchor = right->right_anchor;
! 3918: if (right_len == 0) {
! 3919: to->right_anchor |= left->right_anchor;
! 3920: }
! 3921: }
! 3922:
! 3923: static int
! 3924: is_left_anchor(int anc)
! 3925: {
! 3926: if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
! 3927: anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
! 3928: anc == ANCHOR_PREC_READ_NOT)
! 3929: return 0;
! 3930:
! 3931: return 1;
! 3932: }
! 3933:
! 3934: static int
! 3935: is_set_opt_anc_info(OptAncInfo* to, int anc)
! 3936: {
! 3937: if ((to->left_anchor & anc) != 0) return 1;
! 3938:
! 3939: return ((to->right_anchor & anc) != 0 ? 1 : 0);
! 3940: }
! 3941:
! 3942: static void
! 3943: add_opt_anc_info(OptAncInfo* to, int anc)
! 3944: {
! 3945: if (is_left_anchor(anc))
! 3946: to->left_anchor |= anc;
! 3947: else
! 3948: to->right_anchor |= anc;
! 3949: }
! 3950:
! 3951: static void
! 3952: remove_opt_anc_info(OptAncInfo* to, int anc)
! 3953: {
! 3954: if (is_left_anchor(anc))
! 3955: to->left_anchor &= ~anc;
! 3956: else
! 3957: to->right_anchor &= ~anc;
! 3958: }
! 3959:
! 3960: static void
! 3961: alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
! 3962: {
! 3963: to->left_anchor &= add->left_anchor;
! 3964: to->right_anchor &= add->right_anchor;
! 3965: }
! 3966:
! 3967: static int
! 3968: is_full_opt_exact_info(OptExactInfo* ex)
! 3969: {
! 3970: return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
! 3971: }
! 3972:
! 3973: static void
! 3974: clear_opt_exact_info(OptExactInfo* ex)
! 3975: {
! 3976: clear_mml(&ex->mmd);
! 3977: clear_opt_anc_info(&ex->anc);
! 3978: ex->reach_end = 0;
! 3979: ex->ignore_case = 0;
! 3980: ex->len = 0;
! 3981: ex->s[0] = '\0';
! 3982: }
! 3983:
! 3984: static void
! 3985: copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
! 3986: {
! 3987: *to = *from;
! 3988: }
! 3989:
! 3990: static void
! 3991: concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
! 3992: {
! 3993: int i, j, len;
! 3994: UChar *p, *end;
! 3995: OptAncInfo tanc;
! 3996:
! 3997: if (! to->ignore_case && add->ignore_case) {
! 3998: if (to->len >= add->len) return ; /* avoid */
! 3999:
! 4000: to->ignore_case = 1;
! 4001: }
! 4002:
! 4003: p = add->s;
! 4004: end = p + add->len;
! 4005: for (i = to->len; p < end; ) {
! 4006: len = enc_len(enc, p);
! 4007: if (i + len > OPT_EXACT_MAXLEN) break;
! 4008: for (j = 0; j < len && p < end; j++)
! 4009: to->s[i++] = *p++;
! 4010: }
! 4011:
! 4012: to->len = i;
! 4013: to->reach_end = (p == end ? add->reach_end : 0);
! 4014:
! 4015: concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
! 4016: if (! to->reach_end) tanc.right_anchor = 0;
! 4017: copy_opt_anc_info(&to->anc, &tanc);
! 4018: }
! 4019:
! 4020: static void
! 4021: concat_opt_exact_info_str(OptExactInfo* to,
! 4022: UChar* s, UChar* end, int raw, OnigEncoding enc)
! 4023: {
! 4024: int i, j, len;
! 4025: UChar *p;
! 4026:
! 4027: for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
! 4028: len = enc_len(enc, p);
! 4029: if (i + len > OPT_EXACT_MAXLEN) break;
! 4030: for (j = 0; j < len && p < end; j++)
! 4031: to->s[i++] = *p++;
! 4032: }
! 4033:
! 4034: to->len = i;
! 4035: }
! 4036:
! 4037: static void
! 4038: alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
! 4039: {
! 4040: int i, j, len;
! 4041:
! 4042: if (add->len == 0 || to->len == 0) {
! 4043: clear_opt_exact_info(to);
! 4044: return ;
! 4045: }
! 4046:
! 4047: if (! is_equal_mml(&to->mmd, &add->mmd)) {
! 4048: clear_opt_exact_info(to);
! 4049: return ;
! 4050: }
! 4051:
! 4052: for (i = 0; i < to->len && i < add->len; ) {
! 4053: if (to->s[i] != add->s[i]) break;
! 4054: len = enc_len(env->enc, to->s + i);
! 4055:
! 4056: for (j = 1; j < len; j++) {
! 4057: if (to->s[i+j] != add->s[i+j]) break;
! 4058: }
! 4059: if (j < len) break;
! 4060: i += len;
! 4061: }
! 4062:
! 4063: if (! add->reach_end || i < add->len || i < to->len) {
! 4064: to->reach_end = 0;
! 4065: }
! 4066: to->len = i;
! 4067: to->ignore_case |= add->ignore_case;
! 4068:
! 4069: alt_merge_opt_anc_info(&to->anc, &add->anc);
! 4070: if (! to->reach_end) to->anc.right_anchor = 0;
! 4071: }
! 4072:
! 4073: static void
! 4074: select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
! 4075: {
! 4076: int v1, v2;
! 4077:
! 4078: v1 = now->len;
! 4079: v2 = alt->len;
! 4080:
! 4081: if (v2 == 0) {
! 4082: return ;
! 4083: }
! 4084: else if (v1 == 0) {
! 4085: copy_opt_exact_info(now, alt);
! 4086: return ;
! 4087: }
! 4088: else if (v1 <= 2 && v2 <= 2) {
! 4089: /* ByteValTable[x] is big value --> low price */
! 4090: v2 = map_position_value(enc, now->s[0]);
! 4091: v1 = map_position_value(enc, alt->s[0]);
! 4092:
! 4093: if (now->len > 1) v1 += 5;
! 4094: if (alt->len > 1) v2 += 5;
! 4095: }
! 4096:
! 4097: if (now->ignore_case == 0) v1 *= 2;
! 4098: if (alt->ignore_case == 0) v2 *= 2;
! 4099:
! 4100: if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
! 4101: copy_opt_exact_info(now, alt);
! 4102: }
! 4103:
! 4104: static void
! 4105: clear_opt_map_info(OptMapInfo* map)
! 4106: {
! 4107: static const OptMapInfo clean_info = {
! 4108: {0, 0}, {0, 0}, 0,
! 4109: {
! 4110: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4111: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4112: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4113: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4114: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4115: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4116: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4117: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4118: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4119: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4120: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4121: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4122: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4123: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4124: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 4125: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 4126: }
! 4127: };
! 4128:
! 4129: xmemcpy(map, &clean_info, sizeof(OptMapInfo));
! 4130: }
! 4131:
! 4132: static void
! 4133: copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
! 4134: {
! 4135: *to = *from;
! 4136: }
! 4137:
! 4138: static void
! 4139: add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
! 4140: {
! 4141: if (map->map[c] == 0) {
! 4142: map->map[c] = 1;
! 4143: map->value += map_position_value(enc, c);
! 4144: }
! 4145: }
! 4146:
! 4147: static int
! 4148: add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
! 4149: OnigEncoding enc, OnigAmbigType ambig_flag)
! 4150: {
! 4151: int i, n, len;
! 4152: UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
! 4153: OnigCodePoint code;
! 4154: const OnigPairAmbigCodes* pccs;
! 4155: OnigAmbigType amb;
! 4156:
! 4157: add_char_opt_map_info(map, p[0], enc);
! 4158: code = ONIGENC_MBC_TO_CODE(enc, p, end);
! 4159:
! 4160: for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
! 4161: if ((amb & ambig_flag) == 0) continue;
! 4162:
! 4163: n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
! 4164: for (i = 0; i < n; i++) {
! 4165: if (pccs[i].from == code) {
! 4166: len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
! 4167: if (len < 0) return len;
! 4168: add_char_opt_map_info(map, buf[0], enc);
! 4169: }
! 4170: }
! 4171: }
! 4172: return 0;
! 4173: }
! 4174:
! 4175: static void
! 4176: select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
! 4177: {
! 4178: static int z = 1<<15; /* 32768: something big value */
! 4179:
! 4180: int v1, v2;
! 4181:
! 4182: if (alt->value == 0) return ;
! 4183: if (now->value == 0) {
! 4184: copy_opt_map_info(now, alt);
! 4185: return ;
! 4186: }
! 4187:
! 4188: v1 = z / now->value;
! 4189: v2 = z / alt->value;
! 4190: if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
! 4191: copy_opt_map_info(now, alt);
! 4192: }
! 4193:
! 4194: static int
! 4195: comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
! 4196: {
! 4197: #define COMP_EM_BASE 20
! 4198: int ve, vm;
! 4199:
! 4200: if (m->value <= 0) return -1;
! 4201:
! 4202: ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
! 4203: vm = COMP_EM_BASE * 5 * 2 / m->value;
! 4204: return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
! 4205: }
! 4206:
! 4207: static void
! 4208: alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
! 4209: {
! 4210: int i, val;
! 4211:
! 4212: /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
! 4213: if (to->value == 0) return ;
! 4214: if (add->value == 0 || to->mmd.max < add->mmd.min) {
! 4215: clear_opt_map_info(to);
! 4216: return ;
! 4217: }
! 4218:
! 4219: alt_merge_mml(&to->mmd, &add->mmd);
! 4220:
! 4221: val = 0;
! 4222: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
! 4223: if (add->map[i])
! 4224: to->map[i] = 1;
! 4225:
! 4226: if (to->map[i])
! 4227: val += map_position_value(enc, i);
! 4228: }
! 4229: to->value = val;
! 4230:
! 4231: alt_merge_opt_anc_info(&to->anc, &add->anc);
! 4232: }
! 4233:
! 4234: static void
! 4235: set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
! 4236: {
! 4237: copy_mml(&(opt->exb.mmd), mmd);
! 4238: copy_mml(&(opt->expr.mmd), mmd);
! 4239: copy_mml(&(opt->map.mmd), mmd);
! 4240: }
! 4241:
! 4242: static void
! 4243: clear_node_opt_info(NodeOptInfo* opt)
! 4244: {
! 4245: clear_mml(&opt->len);
! 4246: clear_opt_anc_info(&opt->anc);
! 4247: clear_opt_exact_info(&opt->exb);
! 4248: clear_opt_exact_info(&opt->exm);
! 4249: clear_opt_exact_info(&opt->expr);
! 4250: clear_opt_map_info(&opt->map);
! 4251: }
! 4252:
! 4253: static void
! 4254: copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
! 4255: {
! 4256: *to = *from;
! 4257: }
! 4258:
! 4259: static void
! 4260: concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
! 4261: {
! 4262: int exb_reach, exm_reach;
! 4263: OptAncInfo tanc;
! 4264:
! 4265: concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
! 4266: copy_opt_anc_info(&to->anc, &tanc);
! 4267:
! 4268: if (add->exb.len > 0 && to->len.max == 0) {
! 4269: concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
! 4270: to->len.max, add->len.max);
! 4271: copy_opt_anc_info(&add->exb.anc, &tanc);
! 4272: }
! 4273:
! 4274: if (add->map.value > 0 && to->len.max == 0) {
! 4275: if (add->map.mmd.max == 0)
! 4276: add->map.anc.left_anchor |= to->anc.left_anchor;
! 4277: }
! 4278:
! 4279: exb_reach = to->exb.reach_end;
! 4280: exm_reach = to->exm.reach_end;
! 4281:
! 4282: if (add->len.max != 0)
! 4283: to->exb.reach_end = to->exm.reach_end = 0;
! 4284:
! 4285: if (add->exb.len > 0) {
! 4286: if (exb_reach) {
! 4287: concat_opt_exact_info(&to->exb, &add->exb, enc);
! 4288: clear_opt_exact_info(&add->exb);
! 4289: }
! 4290: else if (exm_reach) {
! 4291: concat_opt_exact_info(&to->exm, &add->exb, enc);
! 4292: clear_opt_exact_info(&add->exb);
! 4293: }
! 4294: }
! 4295: select_opt_exact_info(enc, &to->exm, &add->exb);
! 4296: select_opt_exact_info(enc, &to->exm, &add->exm);
! 4297:
! 4298: if (to->expr.len > 0) {
! 4299: if (add->len.max > 0) {
! 4300: if (to->expr.len > (int )add->len.max)
! 4301: to->expr.len = add->len.max;
! 4302:
! 4303: if (to->expr.mmd.max == 0)
! 4304: select_opt_exact_info(enc, &to->exb, &to->expr);
! 4305: else
! 4306: select_opt_exact_info(enc, &to->exm, &to->expr);
! 4307: }
! 4308: }
! 4309: else if (add->expr.len > 0) {
! 4310: copy_opt_exact_info(&to->expr, &add->expr);
! 4311: }
! 4312:
! 4313: select_opt_map_info(&to->map, &add->map);
! 4314:
! 4315: add_mml(&to->len, &add->len);
! 4316: }
! 4317:
! 4318: static void
! 4319: alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
! 4320: {
! 4321: alt_merge_opt_anc_info (&to->anc, &add->anc);
! 4322: alt_merge_opt_exact_info(&to->exb, &add->exb, env);
! 4323: alt_merge_opt_exact_info(&to->exm, &add->exm, env);
! 4324: alt_merge_opt_exact_info(&to->expr, &add->expr, env);
! 4325: alt_merge_opt_map_info(env->enc, &to->map, &add->map);
! 4326:
! 4327: alt_merge_mml(&to->len, &add->len);
! 4328: }
! 4329:
! 4330:
! 4331: #define MAX_NODE_OPT_INFO_REF_COUNT 5
! 4332:
! 4333: static int
! 4334: optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
! 4335: {
! 4336: int type;
! 4337: int r = 0;
! 4338:
! 4339: clear_node_opt_info(opt);
! 4340: set_bound_node_opt_info(opt, &env->mmd);
! 4341:
! 4342: type = NTYPE(node);
! 4343: switch (type) {
! 4344: case N_LIST:
! 4345: {
! 4346: OptEnv nenv;
! 4347: NodeOptInfo nopt;
! 4348: Node* nd = node;
! 4349:
! 4350: copy_opt_env(&nenv, env);
! 4351: do {
! 4352: r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
! 4353: if (r == 0) {
! 4354: add_mml(&nenv.mmd, &nopt.len);
! 4355: concat_left_node_opt_info(env->enc, opt, &nopt);
! 4356: }
! 4357: } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
! 4358: }
! 4359: break;
! 4360:
! 4361: case N_ALT:
! 4362: {
! 4363: NodeOptInfo nopt;
! 4364: Node* nd = node;
! 4365:
! 4366: do {
! 4367: r = optimize_node_left(NCONS(nd).left, &nopt, env);
! 4368: if (r == 0) {
! 4369: if (nd == node) copy_node_opt_info(opt, &nopt);
! 4370: else alt_merge_node_opt_info(opt, &nopt, env);
! 4371: }
! 4372: } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
! 4373: }
! 4374: break;
! 4375:
! 4376: case N_STRING:
! 4377: {
! 4378: StrNode* sn = &(NSTRING(node));
! 4379: int slen = sn->end - sn->s;
! 4380: int is_raw = NSTRING_IS_RAW(node);
! 4381:
! 4382: if (! NSTRING_IS_AMBIG(node)) {
! 4383: concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
! 4384: NSTRING_IS_RAW(node), env->enc);
! 4385: if (slen > 0) {
! 4386: add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
! 4387: }
! 4388: set_mml(&opt->len, slen, slen);
! 4389: }
! 4390: else {
! 4391: int n, max;
! 4392:
! 4393: concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
! 4394: is_raw, env->enc);
! 4395: opt->exb.ignore_case = 1;
! 4396:
! 4397: if (slen > 0) {
! 4398: r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
! 4399: env->enc, env->ambig_flag);
! 4400: if (r != 0) break;
! 4401: }
! 4402:
! 4403: if (NSTRING_IS_AMBIG_REDUCE(node)) {
! 4404: n = onigenc_strlen(env->enc, sn->s, sn->end);
! 4405: max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
! 4406: }
! 4407: else {
! 4408: max = slen;
! 4409: }
! 4410: set_mml(&opt->len, slen, max);
! 4411: }
! 4412:
! 4413: if (opt->exb.len == slen)
! 4414: opt->exb.reach_end = 1;
! 4415: }
! 4416: break;
! 4417:
! 4418: case N_CCLASS:
! 4419: {
! 4420: int i, z;
! 4421: CClassNode* cc = &(NCCLASS(node));
! 4422:
! 4423: /* no need to check ignore case. (setted in setup_tree()) */
! 4424:
! 4425: if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
! 4426: OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
! 4427: OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
! 4428:
! 4429: set_mml(&opt->len, min, max);
! 4430: }
! 4431: else {
! 4432: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 4433: z = BITSET_AT(cc->bs, i);
! 4434: if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
! 4435: add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
! 4436: }
! 4437: }
! 4438: set_mml(&opt->len, 1, 1);
! 4439: }
! 4440: }
! 4441: break;
! 4442:
! 4443: case N_CTYPE:
! 4444: {
! 4445: int i, min, max;
! 4446:
! 4447: max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
! 4448:
! 4449: if (max == 1) {
! 4450: min = 1;
! 4451:
! 4452: switch (NCTYPE(node).type) {
! 4453: case CTYPE_NOT_WORD:
! 4454: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 4455: if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
! 4456: add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
! 4457: }
! 4458: }
! 4459: break;
! 4460:
! 4461: case CTYPE_WORD:
! 4462: for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
! 4463: if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
! 4464: add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
! 4465: }
! 4466: }
! 4467: break;
! 4468: }
! 4469: }
! 4470: else {
! 4471: min = ONIGENC_MBC_MINLEN(env->enc);
! 4472: }
! 4473: set_mml(&opt->len, min, max);
! 4474: }
! 4475: break;
! 4476:
! 4477: case N_ANYCHAR:
! 4478: {
! 4479: OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
! 4480: OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
! 4481: set_mml(&opt->len, min, max);
! 4482: }
! 4483: break;
! 4484:
! 4485: case N_ANCHOR:
! 4486: switch (NANCHOR(node).type) {
! 4487: case ANCHOR_BEGIN_BUF:
! 4488: case ANCHOR_BEGIN_POSITION:
! 4489: case ANCHOR_BEGIN_LINE:
! 4490: case ANCHOR_END_BUF:
! 4491: case ANCHOR_SEMI_END_BUF:
! 4492: case ANCHOR_END_LINE:
! 4493: add_opt_anc_info(&opt->anc, NANCHOR(node).type);
! 4494: break;
! 4495:
! 4496: case ANCHOR_PREC_READ:
! 4497: {
! 4498: NodeOptInfo nopt;
! 4499:
! 4500: r = optimize_node_left(NANCHOR(node).target, &nopt, env);
! 4501: if (r == 0) {
! 4502: if (nopt.exb.len > 0)
! 4503: copy_opt_exact_info(&opt->expr, &nopt.exb);
! 4504: else if (nopt.exm.len > 0)
! 4505: copy_opt_exact_info(&opt->expr, &nopt.exm);
! 4506:
! 4507: opt->expr.reach_end = 0;
! 4508:
! 4509: if (nopt.map.value > 0)
! 4510: copy_opt_map_info(&opt->map, &nopt.map);
! 4511: }
! 4512: }
! 4513: break;
! 4514:
! 4515: case ANCHOR_PREC_READ_NOT:
! 4516: case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
! 4517: case ANCHOR_LOOK_BEHIND_NOT:
! 4518: break;
! 4519: }
! 4520: break;
! 4521:
! 4522: case N_BACKREF:
! 4523: {
! 4524: int i;
! 4525: int* backs;
! 4526: OnigDistance min, max, tmin, tmax;
! 4527: Node** nodes = SCANENV_MEM_NODES(env->scan_env);
! 4528: BackrefNode* br = &(NBACKREF(node));
! 4529:
! 4530: if (br->state & NST_RECURSION) {
! 4531: set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
! 4532: break;
! 4533: }
! 4534: backs = BACKREFS_P(br);
! 4535: r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
! 4536: if (r != 0) break;
! 4537: r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
! 4538: if (r != 0) break;
! 4539: for (i = 1; i < br->back_num; i++) {
! 4540: r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
! 4541: if (r != 0) break;
! 4542: r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
! 4543: if (r != 0) break;
! 4544: if (min > tmin) min = tmin;
! 4545: if (max < tmax) max = tmax;
! 4546: }
! 4547: if (r == 0) set_mml(&opt->len, min, max);
! 4548: }
! 4549: break;
! 4550:
! 4551: #ifdef USE_SUBEXP_CALL
! 4552: case N_CALL:
! 4553: if (IS_CALL_RECURSION(&(NCALL(node))))
! 4554: set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
! 4555: else {
! 4556: OnigOptionType save = env->options;
! 4557: env->options = NEFFECT(NCALL(node).target).option;
! 4558: r = optimize_node_left(NCALL(node).target, opt, env);
! 4559: env->options = save;
! 4560: }
! 4561: break;
! 4562: #endif
! 4563:
! 4564: case N_QUANTIFIER:
! 4565: {
! 4566: int i;
! 4567: OnigDistance min, max;
! 4568: NodeOptInfo nopt;
! 4569: QuantifierNode* qn = &(NQUANTIFIER(node));
! 4570:
! 4571: r = optimize_node_left(qn->target, &nopt, env);
! 4572: if (r) break;
! 4573:
! 4574: if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
! 4575: if (env->mmd.max == 0 &&
! 4576: NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
! 4577: if (IS_MULTILINE(env->options))
! 4578: add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
! 4579: else
! 4580: add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
! 4581: }
! 4582: }
! 4583: else {
! 4584: if (qn->lower > 0) {
! 4585: copy_node_opt_info(opt, &nopt);
! 4586: if (nopt.exb.len > 0) {
! 4587: if (nopt.exb.reach_end) {
! 4588: for (i = 2; i < qn->lower &&
! 4589: ! is_full_opt_exact_info(&opt->exb); i++) {
! 4590: concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
! 4591: }
! 4592: if (i < qn->lower) {
! 4593: opt->exb.reach_end = 0;
! 4594: }
! 4595: }
! 4596: }
! 4597:
! 4598: if (qn->lower != qn->upper) {
! 4599: opt->exb.reach_end = 0;
! 4600: opt->exm.reach_end = 0;
! 4601: }
! 4602: if (qn->lower > 1)
! 4603: opt->exm.reach_end = 0;
! 4604: }
! 4605: }
! 4606:
! 4607: min = distance_multiply(nopt.len.min, qn->lower);
! 4608: if (IS_REPEAT_INFINITE(qn->upper))
! 4609: max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
! 4610: else
! 4611: max = distance_multiply(nopt.len.max, qn->upper);
! 4612:
! 4613: set_mml(&opt->len, min, max);
! 4614: }
! 4615: break;
! 4616:
! 4617: case N_EFFECT:
! 4618: {
! 4619: EffectNode* en = &(NEFFECT(node));
! 4620:
! 4621: switch (en->type) {
! 4622: case EFFECT_OPTION:
! 4623: {
! 4624: OnigOptionType save = env->options;
! 4625:
! 4626: env->options = en->option;
! 4627: r = optimize_node_left(en->target, opt, env);
! 4628: env->options = save;
! 4629: }
! 4630: break;
! 4631:
! 4632: case EFFECT_MEMORY:
! 4633: #ifdef USE_SUBEXP_CALL
! 4634: en->opt_count++;
! 4635: if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
! 4636: OnigDistance min, max;
! 4637:
! 4638: min = 0;
! 4639: max = ONIG_INFINITE_DISTANCE;
! 4640: if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
! 4641: if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
! 4642: set_mml(&opt->len, min, max);
! 4643: }
! 4644: else
! 4645: #endif
! 4646: {
! 4647: r = optimize_node_left(en->target, opt, env);
! 4648:
! 4649: if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
! 4650: if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
! 4651: remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
! 4652: }
! 4653: }
! 4654: break;
! 4655:
! 4656: case EFFECT_STOP_BACKTRACK:
! 4657: r = optimize_node_left(en->target, opt, env);
! 4658: break;
! 4659: }
! 4660: }
! 4661: break;
! 4662:
! 4663: default:
! 4664: #ifdef ONIG_DEBUG
! 4665: fprintf(stderr, "optimize_node_left: undefined node type %d\n",
! 4666: NTYPE(node));
! 4667: #endif
! 4668: r = ONIGERR_TYPE_BUG;
! 4669: break;
! 4670: }
! 4671:
! 4672: return r;
! 4673: }
! 4674:
! 4675: static int
! 4676: set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
! 4677: {
! 4678: int r;
! 4679:
! 4680: if (e->len == 0) return 0;
! 4681:
! 4682: if (e->ignore_case) {
! 4683: reg->exact = (UChar* )xmalloc(e->len);
! 4684: CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
! 4685: xmemcpy(reg->exact, e->s, e->len);
! 4686: reg->exact_end = reg->exact + e->len;
! 4687: reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
! 4688: }
! 4689: else {
! 4690: int allow_reverse;
! 4691:
! 4692: reg->exact = k_strdup(e->s, e->s + e->len);
! 4693: CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
! 4694: reg->exact_end = reg->exact + e->len;
! 4695:
! 4696: allow_reverse =
! 4697: ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
! 4698:
! 4699: if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
! 4700: r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
! 4701: reg->map, &(reg->int_map));
! 4702: if (r) return r;
! 4703:
! 4704: reg->optimize = (allow_reverse != 0
! 4705: ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
! 4706: }
! 4707: else {
! 4708: reg->optimize = ONIG_OPTIMIZE_EXACT;
! 4709: }
! 4710: }
! 4711:
! 4712: reg->dmin = e->mmd.min;
! 4713: reg->dmax = e->mmd.max;
! 4714:
! 4715: if (reg->dmin != ONIG_INFINITE_DISTANCE) {
! 4716: reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
! 4717: }
! 4718:
! 4719: return 0;
! 4720: }
! 4721:
! 4722: static void
! 4723: set_optimize_map_info(regex_t* reg, OptMapInfo* m)
! 4724: {
! 4725: int i;
! 4726:
! 4727: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
! 4728: reg->map[i] = m->map[i];
! 4729:
! 4730: reg->optimize = ONIG_OPTIMIZE_MAP;
! 4731: reg->dmin = m->mmd.min;
! 4732: reg->dmax = m->mmd.max;
! 4733:
! 4734: if (reg->dmin != ONIG_INFINITE_DISTANCE) {
! 4735: reg->threshold_len = reg->dmin + 1;
! 4736: }
! 4737: }
! 4738:
! 4739: static void
! 4740: set_sub_anchor(regex_t* reg, OptAncInfo* anc)
! 4741: {
! 4742: reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
! 4743: reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
! 4744: }
! 4745:
! 4746: #ifdef ONIG_DEBUG
! 4747: static void print_optimize_info(FILE* f, regex_t* reg);
! 4748: #endif
! 4749:
! 4750: static int
! 4751: set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
! 4752: {
! 4753:
! 4754: int r;
! 4755: NodeOptInfo opt;
! 4756: OptEnv env;
! 4757:
! 4758: env.enc = reg->enc;
! 4759: env.options = reg->options;
! 4760: env.ambig_flag = reg->ambig_flag;
! 4761: env.scan_env = scan_env;
! 4762: clear_mml(&env.mmd);
! 4763:
! 4764: r = optimize_node_left(node, &opt, &env);
! 4765: if (r) return r;
! 4766:
! 4767: reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
! 4768: ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
! 4769:
! 4770: reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
! 4771:
! 4772: if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
! 4773: reg->anchor_dmin = opt.len.min;
! 4774: reg->anchor_dmax = opt.len.max;
! 4775: }
! 4776:
! 4777: if (opt.exb.len > 0 || opt.exm.len > 0) {
! 4778: select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
! 4779: if (opt.map.value > 0 &&
! 4780: comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
! 4781: goto set_map;
! 4782: }
! 4783: else {
! 4784: r = set_optimize_exact_info(reg, &opt.exb);
! 4785: set_sub_anchor(reg, &opt.exb.anc);
! 4786: }
! 4787: }
! 4788: else if (opt.map.value > 0) {
! 4789: set_map:
! 4790: set_optimize_map_info(reg, &opt.map);
! 4791: set_sub_anchor(reg, &opt.map.anc);
! 4792: }
! 4793: else {
! 4794: reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
! 4795: if (opt.len.max == 0)
! 4796: reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
! 4797: }
! 4798:
! 4799: #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
! 4800: print_optimize_info(stderr, reg);
! 4801: #endif
! 4802: return r;
! 4803: }
! 4804:
! 4805: static void
! 4806: clear_optimize_info(regex_t* reg)
! 4807: {
! 4808: reg->optimize = ONIG_OPTIMIZE_NONE;
! 4809: reg->anchor = 0;
! 4810: reg->anchor_dmin = 0;
! 4811: reg->anchor_dmax = 0;
! 4812: reg->sub_anchor = 0;
! 4813: reg->exact_end = (UChar* )NULL;
! 4814: reg->threshold_len = 0;
! 4815: if (IS_NOT_NULL(reg->exact)) {
! 4816: xfree(reg->exact);
! 4817: reg->exact = (UChar* )NULL;
! 4818: }
! 4819: }
! 4820:
! 4821: #ifdef ONIG_DEBUG
! 4822:
! 4823: static void print_enc_string(FILE* fp, OnigEncoding enc,
! 4824: const UChar *s, const UChar *end)
! 4825: {
! 4826: fprintf(fp, "\nPATTERN: /");
! 4827:
! 4828: if (ONIGENC_MBC_MINLEN(enc) > 1) {
! 4829: const UChar *p;
! 4830: OnigCodePoint code;
! 4831:
! 4832: p = s;
! 4833: while (p < end) {
! 4834: code = ONIGENC_MBC_TO_CODE(enc, p, end);
! 4835: if (code >= 0x80) {
! 4836: fprintf(fp, " 0x%04x ", (int )code);
! 4837: }
! 4838: else {
! 4839: fputc((int )code, fp);
! 4840: }
! 4841:
! 4842: p += enc_len(enc, p);
! 4843: }
! 4844: }
! 4845: else {
! 4846: while (s < end) {
! 4847: fputc((int )*s, fp);
! 4848: s++;
! 4849: }
! 4850: }
! 4851:
! 4852: fprintf(fp, "/\n");
! 4853: }
! 4854:
! 4855: static void
! 4856: print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
! 4857: {
! 4858: if (a == ONIG_INFINITE_DISTANCE)
! 4859: fputs("inf", f);
! 4860: else
! 4861: fprintf(f, "(%u)", a);
! 4862:
! 4863: fputs("-", f);
! 4864:
! 4865: if (b == ONIG_INFINITE_DISTANCE)
! 4866: fputs("inf", f);
! 4867: else
! 4868: fprintf(f, "(%u)", b);
! 4869: }
! 4870:
! 4871: static void
! 4872: print_anchor(FILE* f, int anchor)
! 4873: {
! 4874: int q = 0;
! 4875:
! 4876: fprintf(f, "[");
! 4877:
! 4878: if (anchor & ANCHOR_BEGIN_BUF) {
! 4879: fprintf(f, "begin-buf");
! 4880: q = 1;
! 4881: }
! 4882: if (anchor & ANCHOR_BEGIN_LINE) {
! 4883: if (q) fprintf(f, ", ");
! 4884: q = 1;
! 4885: fprintf(f, "begin-line");
! 4886: }
! 4887: if (anchor & ANCHOR_BEGIN_POSITION) {
! 4888: if (q) fprintf(f, ", ");
! 4889: q = 1;
! 4890: fprintf(f, "begin-pos");
! 4891: }
! 4892: if (anchor & ANCHOR_END_BUF) {
! 4893: if (q) fprintf(f, ", ");
! 4894: q = 1;
! 4895: fprintf(f, "end-buf");
! 4896: }
! 4897: if (anchor & ANCHOR_SEMI_END_BUF) {
! 4898: if (q) fprintf(f, ", ");
! 4899: q = 1;
! 4900: fprintf(f, "semi-end-buf");
! 4901: }
! 4902: if (anchor & ANCHOR_END_LINE) {
! 4903: if (q) fprintf(f, ", ");
! 4904: q = 1;
! 4905: fprintf(f, "end-line");
! 4906: }
! 4907: if (anchor & ANCHOR_ANYCHAR_STAR) {
! 4908: if (q) fprintf(f, ", ");
! 4909: q = 1;
! 4910: fprintf(f, "anychar-star");
! 4911: }
! 4912: if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
! 4913: if (q) fprintf(f, ", ");
! 4914: fprintf(f, "anychar-star-pl");
! 4915: }
! 4916:
! 4917: fprintf(f, "]");
! 4918: }
! 4919:
! 4920: static void
! 4921: print_optimize_info(FILE* f, regex_t* reg)
! 4922: {
! 4923: static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
! 4924: "EXACT_IC", "MAP" };
! 4925:
! 4926: fprintf(f, "optimize: %s\n", on[reg->optimize]);
! 4927: fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
! 4928: if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
! 4929: print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
! 4930: fprintf(f, "\n");
! 4931:
! 4932: if (reg->optimize) {
! 4933: fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
! 4934: fprintf(f, "\n");
! 4935: }
! 4936: fprintf(f, "\n");
! 4937:
! 4938: if (reg->exact) {
! 4939: UChar *p;
! 4940: fprintf(f, "exact: [");
! 4941: for (p = reg->exact; p < reg->exact_end; p++) {
! 4942: fputc(*p, f);
! 4943: }
! 4944: fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
! 4945: }
! 4946: else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
! 4947: int c, i, n = 0;
! 4948:
! 4949: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
! 4950: if (reg->map[i]) n++;
! 4951:
! 4952: fprintf(f, "map: n=%d\n", n);
! 4953: if (n > 0) {
! 4954: c = 0;
! 4955: fputc('[', f);
! 4956: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
! 4957: if (reg->map[i] != 0) {
! 4958: if (c > 0) fputs(", ", f);
! 4959: c++;
! 4960: if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
! 4961: ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
! 4962: fputc(i, f);
! 4963: else
! 4964: fprintf(f, "%d", i);
! 4965: }
! 4966: }
! 4967: fprintf(f, "]\n");
! 4968: }
! 4969: }
! 4970: }
! 4971: #endif /* ONIG_DEBUG */
! 4972:
! 4973:
! 4974: static void
! 4975: onig_free_body(regex_t* reg)
! 4976: {
! 4977: if (IS_NOT_NULL(reg->p)) xfree(reg->p);
! 4978: if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
! 4979: if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
! 4980: if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
! 4981: if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
! 4982: if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
! 4983:
! 4984: #ifdef USE_NAMED_GROUP
! 4985: onig_names_free(reg);
! 4986: #endif
! 4987: }
! 4988:
! 4989: extern void
! 4990: onig_free(regex_t* reg)
! 4991: {
! 4992: if (IS_NOT_NULL(reg)) {
! 4993: onig_free_body(reg);
! 4994: xfree(reg);
! 4995: }
! 4996: }
! 4997:
! 4998: #define REGEX_TRANSFER(to,from) do {\
! 4999: (to)->state = ONIG_STATE_MODIFY;\
! 5000: onig_free_body(to);\
! 5001: xmemcpy(to, from, sizeof(regex_t));\
! 5002: xfree(from);\
! 5003: } while (0)
! 5004:
! 5005: extern void
! 5006: onig_transfer(regex_t* to, regex_t* from)
! 5007: {
! 5008: THREAD_ATOMIC_START;
! 5009: REGEX_TRANSFER(to, from);
! 5010: THREAD_ATOMIC_END;
! 5011: }
! 5012:
! 5013: #define REGEX_CHAIN_HEAD(reg) do {\
! 5014: while (IS_NOT_NULL((reg)->chain)) {\
! 5015: (reg) = (reg)->chain;\
! 5016: }\
! 5017: } while (0)
! 5018:
! 5019: extern void
! 5020: onig_chain_link_add(regex_t* to, regex_t* add)
! 5021: {
! 5022: THREAD_ATOMIC_START;
! 5023: REGEX_CHAIN_HEAD(to);
! 5024: to->chain = add;
! 5025: THREAD_ATOMIC_END;
! 5026: }
! 5027:
! 5028: extern void
! 5029: onig_chain_reduce(regex_t* reg)
! 5030: {
! 5031: regex_t *head, *prev;
! 5032:
! 5033: prev = reg;
! 5034: head = prev->chain;
! 5035: if (IS_NOT_NULL(head)) {
! 5036: reg->state = ONIG_STATE_MODIFY;
! 5037: while (IS_NOT_NULL(head->chain)) {
! 5038: prev = head;
! 5039: head = head->chain;
! 5040: }
! 5041: prev->chain = (regex_t* )NULL;
! 5042: REGEX_TRANSFER(reg, head);
! 5043: }
! 5044: }
! 5045:
! 5046: #if 0
! 5047: extern int
! 5048: onig_clone(regex_t** to, regex_t* from)
! 5049: {
! 5050: int r, size;
! 5051: regex_t* reg;
! 5052:
! 5053: #ifdef USE_MULTI_THREAD_SYSTEM
! 5054: if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
! 5055: ONIG_STATE_INC(from);
! 5056: if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
! 5057: onig_chain_reduce(from);
! 5058: ONIG_STATE_INC(from);
! 5059: }
! 5060: }
! 5061: else {
! 5062: int n = 0;
! 5063: while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
! 5064: if (++n > THREAD_PASS_LIMIT_COUNT)
! 5065: return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
! 5066: THREAD_PASS;
! 5067: }
! 5068: ONIG_STATE_INC(from);
! 5069: }
! 5070: #endif /* USE_MULTI_THREAD_SYSTEM */
! 5071:
! 5072: r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
! 5073: from->enc, ONIG_SYNTAX_DEFAULT);
! 5074: if (r != 0) {
! 5075: ONIG_STATE_DEC(from);
! 5076: return r;
! 5077: }
! 5078:
! 5079: xmemcpy(reg, from, sizeof(onig_t));
! 5080: reg->chain = (regex_t* )NULL;
! 5081: reg->state = ONIG_STATE_NORMAL;
! 5082:
! 5083: if (from->p) {
! 5084: reg->p = (UChar* )xmalloc(reg->alloc);
! 5085: if (IS_NULL(reg->p)) goto mem_error;
! 5086: xmemcpy(reg->p, from->p, reg->alloc);
! 5087: }
! 5088:
! 5089: if (from->exact) {
! 5090: reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
! 5091: if (IS_NULL(reg->exact)) goto mem_error;
! 5092: reg->exact_end = reg->exact + (from->exact_end - from->exact);
! 5093: xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
! 5094: }
! 5095:
! 5096: if (from->int_map) {
! 5097: size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
! 5098: reg->int_map = (int* )xmalloc(size);
! 5099: if (IS_NULL(reg->int_map)) goto mem_error;
! 5100: xmemcpy(reg->int_map, from->int_map, size);
! 5101: }
! 5102:
! 5103: if (from->int_map_backward) {
! 5104: size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
! 5105: reg->int_map_backward = (int* )xmalloc(size);
! 5106: if (IS_NULL(reg->int_map_backward)) goto mem_error;
! 5107: xmemcpy(reg->int_map_backward, from->int_map_backward, size);
! 5108: }
! 5109:
! 5110: #ifdef USE_NAMED_GROUP
! 5111: reg->name_table = names_clone(from); /* names_clone is not implemented */
! 5112: #endif
! 5113:
! 5114: ONIG_STATE_DEC(from);
! 5115: *to = reg;
! 5116: return 0;
! 5117:
! 5118: mem_error:
! 5119: ONIG_STATE_DEC(from);
! 5120: return ONIGERR_MEMORY;
! 5121: }
! 5122: #endif
! 5123:
! 5124: #ifdef ONIG_DEBUG
! 5125: static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
! 5126: #endif
! 5127: #ifdef ONIG_DEBUG_PARSE_TREE
! 5128: static void print_tree P_((FILE* f, Node* node));
! 5129: #endif
! 5130:
! 5131: extern int
! 5132: onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
! 5133: OnigErrorInfo* einfo)
! 5134: {
! 5135: #define COMPILE_INIT_SIZE 20
! 5136:
! 5137: int r, init_size;
! 5138: Node* root;
! 5139: ScanEnv scan_env;
! 5140: #ifdef USE_SUBEXP_CALL
! 5141: UnsetAddrList uslist;
! 5142: #endif
! 5143:
! 5144: reg->state = ONIG_STATE_COMPILING;
! 5145:
! 5146: #ifdef ONIG_DEBUG
! 5147: print_enc_string(stderr, reg->enc, pattern, pattern_end);
! 5148: #endif
! 5149:
! 5150: if (reg->alloc == 0) {
! 5151: init_size = (pattern_end - pattern) * 2;
! 5152: if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
! 5153: r = BBUF_INIT(reg, init_size);
! 5154: if (r != 0) goto end;
! 5155: }
! 5156: else
! 5157: reg->used = 0;
! 5158:
! 5159: reg->num_mem = 0;
! 5160: reg->num_repeat = 0;
! 5161: reg->num_null_check = 0;
! 5162: reg->repeat_range_alloc = 0;
! 5163: reg->repeat_range = (OnigRepeatRange* )NULL;
! 5164: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 5165: reg->num_comb_exp_check = 0;
! 5166: #endif
! 5167:
! 5168: r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
! 5169: if (r != 0) goto err;
! 5170:
! 5171: #ifdef USE_NAMED_GROUP
! 5172: /* mixed use named group and no-named group */
! 5173: if (scan_env.num_named > 0 &&
! 5174: IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
! 5175: !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
! 5176: if (scan_env.num_named != scan_env.num_mem)
! 5177: r = disable_noname_group_capture(&root, reg, &scan_env);
! 5178: else
! 5179: r = numbered_ref_check(root);
! 5180:
! 5181: if (r != 0) goto err;
! 5182: }
! 5183: #endif
! 5184:
! 5185: #ifdef ONIG_DEBUG_PARSE_TREE
! 5186: print_tree(stderr, root);
! 5187: #endif
! 5188:
! 5189: #ifdef USE_SUBEXP_CALL
! 5190: if (scan_env.num_call > 0) {
! 5191: r = unset_addr_list_init(&uslist, scan_env.num_call);
! 5192: if (r != 0) goto err;
! 5193: scan_env.unset_addr_list = &uslist;
! 5194: r = setup_subexp_call(root, &scan_env);
! 5195: if (r != 0) goto err_unset;
! 5196: r = subexp_recursive_check_trav(root, &scan_env);
! 5197: if (r < 0) goto err_unset;
! 5198: r = subexp_inf_recursive_check_trav(root, &scan_env);
! 5199: if (r != 0) goto err_unset;
! 5200:
! 5201: reg->num_call = scan_env.num_call;
! 5202: }
! 5203: else
! 5204: reg->num_call = 0;
! 5205: #endif
! 5206:
! 5207: r = setup_tree(root, reg, 0, &scan_env);
! 5208: if (r != 0) goto err_unset;
! 5209:
! 5210: reg->capture_history = scan_env.capture_history;
! 5211: reg->bt_mem_start = scan_env.bt_mem_start;
! 5212: reg->bt_mem_start |= reg->capture_history;
! 5213: if (IS_FIND_CONDITION(reg->options))
! 5214: BIT_STATUS_ON_ALL(reg->bt_mem_end);
! 5215: else {
! 5216: reg->bt_mem_end = scan_env.bt_mem_end;
! 5217: reg->bt_mem_end |= reg->capture_history;
! 5218: }
! 5219:
! 5220: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 5221: if (scan_env.backrefed_mem == 0
! 5222: #ifdef USE_SUBEXP_CALL
! 5223: || scan_env.num_call == 0
! 5224: #endif
! 5225: ) {
! 5226: setup_comb_exp_check(root, 0, &scan_env);
! 5227: #ifdef USE_SUBEXP_CALL
! 5228: if (scan_env.has_recursion != 0) {
! 5229: scan_env.num_comb_exp_check = 0;
! 5230: }
! 5231: else
! 5232: #endif
! 5233: if (scan_env.comb_exp_max_regnum > 0) {
! 5234: int i;
! 5235: for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
! 5236: if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
! 5237: scan_env.num_comb_exp_check = 0;
! 5238: break;
! 5239: }
! 5240: }
! 5241: }
! 5242: }
! 5243:
! 5244: reg->num_comb_exp_check = scan_env.num_comb_exp_check;
! 5245: #endif
! 5246:
! 5247: clear_optimize_info(reg);
! 5248: #ifndef ONIG_DONT_OPTIMIZE
! 5249: r = set_optimize_info_from_tree(root, reg, &scan_env);
! 5250: if (r != 0) goto err_unset;
! 5251: #endif
! 5252:
! 5253: if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
! 5254: xfree(scan_env.mem_nodes_dynamic);
! 5255: scan_env.mem_nodes_dynamic = (Node** )NULL;
! 5256: }
! 5257:
! 5258: r = compile_tree(root, reg);
! 5259: if (r == 0) {
! 5260: r = add_opcode(reg, OP_END);
! 5261: #ifdef USE_SUBEXP_CALL
! 5262: if (scan_env.num_call > 0) {
! 5263: r = unset_addr_list_fix(&uslist, reg);
! 5264: unset_addr_list_end(&uslist);
! 5265: if (r) goto err;
! 5266: }
! 5267: #endif
! 5268:
! 5269: if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
! 5270: reg->stack_pop_level = STACK_POP_LEVEL_ALL;
! 5271: else {
! 5272: if (reg->bt_mem_start != 0)
! 5273: reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
! 5274: else
! 5275: reg->stack_pop_level = STACK_POP_LEVEL_FREE;
! 5276: }
! 5277: }
! 5278: #ifdef USE_SUBEXP_CALL
! 5279: else if (scan_env.num_call > 0) {
! 5280: unset_addr_list_end(&uslist);
! 5281: }
! 5282: #endif
! 5283: onig_node_free(root);
! 5284:
! 5285: #ifdef ONIG_DEBUG_COMPILE
! 5286: #ifdef USE_NAMED_GROUP
! 5287: onig_print_names(stderr, reg);
! 5288: #endif
! 5289: print_compiled_byte_code_list(stderr, reg);
! 5290: #endif
! 5291:
! 5292: end:
! 5293: reg->state = ONIG_STATE_NORMAL;
! 5294: return r;
! 5295:
! 5296: err_unset:
! 5297: #ifdef USE_SUBEXP_CALL
! 5298: if (scan_env.num_call > 0) {
! 5299: unset_addr_list_end(&uslist);
! 5300: }
! 5301: #endif
! 5302: err:
! 5303: if (IS_NOT_NULL(scan_env.error)) {
! 5304: if (IS_NOT_NULL(einfo)) {
! 5305: einfo->enc = scan_env.enc;
! 5306: einfo->par = scan_env.error;
! 5307: einfo->par_end = scan_env.error_end;
! 5308: }
! 5309: }
! 5310:
! 5311: if (IS_NOT_NULL(root)) onig_node_free(root);
! 5312: if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
! 5313: xfree(scan_env.mem_nodes_dynamic);
! 5314: return r;
! 5315: }
! 5316:
! 5317: #ifdef USE_RECOMPILE_API
! 5318: extern int
! 5319: onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
! 5320: OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
! 5321: OnigErrorInfo* einfo)
! 5322: {
! 5323: int r;
! 5324: regex_t *new_reg;
! 5325:
! 5326: r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
! 5327: if (r) return r;
! 5328: if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
! 5329: onig_transfer(reg, new_reg);
! 5330: }
! 5331: else {
! 5332: onig_chain_link_add(reg, new_reg);
! 5333: }
! 5334: return 0;
! 5335: }
! 5336: #endif
! 5337:
! 5338: static int onig_inited = 0;
! 5339:
! 5340: extern int
! 5341: onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
! 5342: OnigEncoding enc, OnigSyntaxType* syntax)
! 5343: {
! 5344: if (! onig_inited)
! 5345: onig_init();
! 5346:
! 5347: if (ONIGENC_IS_UNDEF(enc))
! 5348: return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
! 5349:
! 5350: if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
! 5351: == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
! 5352: return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
! 5353: }
! 5354:
! 5355: *reg = (regex_t* )xmalloc(sizeof(regex_t));
! 5356: if (IS_NULL(*reg)) return ONIGERR_MEMORY;
! 5357: (*reg)->state = ONIG_STATE_MODIFY;
! 5358:
! 5359: if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
! 5360: option |= syntax->options;
! 5361: option &= ~ONIG_OPTION_SINGLELINE;
! 5362: }
! 5363: else
! 5364: option |= syntax->options;
! 5365:
! 5366: (*reg)->enc = enc;
! 5367: (*reg)->options = option;
! 5368: (*reg)->syntax = syntax;
! 5369: (*reg)->optimize = 0;
! 5370: (*reg)->exact = (UChar* )NULL;
! 5371: (*reg)->int_map = (int* )NULL;
! 5372: (*reg)->int_map_backward = (int* )NULL;
! 5373: (*reg)->chain = (regex_t* )NULL;
! 5374:
! 5375: (*reg)->p = (UChar* )NULL;
! 5376: (*reg)->alloc = 0;
! 5377: (*reg)->used = 0;
! 5378: (*reg)->name_table = (void* )NULL;
! 5379:
! 5380: (*reg)->ambig_flag = ambig_flag;
! 5381: (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
! 5382:
! 5383: return 0;
! 5384: }
! 5385:
! 5386: extern int
! 5387: onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
! 5388: OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
! 5389: OnigErrorInfo* einfo)
! 5390: {
! 5391: int r;
! 5392:
! 5393: if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
! 5394:
! 5395: r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
! 5396: enc, syntax);
! 5397: if (r) return r;
! 5398:
! 5399: r = onig_compile(*reg, pattern, pattern_end, einfo);
! 5400: if (r) {
! 5401: onig_free(*reg);
! 5402: *reg = NULL;
! 5403: }
! 5404: return r;
! 5405: }
! 5406:
! 5407: extern int
! 5408: onig_init(void)
! 5409: {
! 5410: if (onig_inited != 0)
! 5411: return 0;
! 5412:
! 5413: onig_inited = 1;
! 5414:
! 5415: THREAD_SYSTEM_INIT;
! 5416: THREAD_ATOMIC_START;
! 5417:
! 5418: onigenc_init();
! 5419: onigenc_set_default_caseconv_table((UChar* )0);
! 5420:
! 5421: #ifdef ONIG_DEBUG_STATISTICS
! 5422: onig_statistics_init();
! 5423: #endif
! 5424:
! 5425: THREAD_ATOMIC_END;
! 5426: return 0;
! 5427: }
! 5428:
! 5429:
! 5430: extern int
! 5431: onig_end(void)
! 5432: {
! 5433: extern int onig_free_shared_cclass_table(void);
! 5434:
! 5435: THREAD_ATOMIC_START;
! 5436:
! 5437: #ifdef ONIG_DEBUG_STATISTICS
! 5438: onig_print_statistics(stderr);
! 5439: #endif
! 5440:
! 5441: #ifdef USE_SHARED_CCLASS_TABLE
! 5442: onig_free_shared_cclass_table();
! 5443: #endif
! 5444:
! 5445: #ifdef USE_RECYCLE_NODE
! 5446: onig_free_node_list();
! 5447: #endif
! 5448:
! 5449: onig_inited = 0;
! 5450:
! 5451: THREAD_ATOMIC_END;
! 5452: THREAD_SYSTEM_END;
! 5453: return 0;
! 5454: }
! 5455:
! 5456:
! 5457: #ifdef ONIG_DEBUG
! 5458:
! 5459: /* arguments type */
! 5460: #define ARG_SPECIAL -1
! 5461: #define ARG_NON 0
! 5462: #define ARG_RELADDR 1
! 5463: #define ARG_ABSADDR 2
! 5464: #define ARG_LENGTH 3
! 5465: #define ARG_MEMNUM 4
! 5466: #define ARG_OPTION 5
! 5467: #define ARG_STATE_CHECK 6
! 5468:
! 5469: OnigOpInfoType OnigOpInfo[] = {
! 5470: { OP_FINISH, "finish", ARG_NON },
! 5471: { OP_END, "end", ARG_NON },
! 5472: { OP_EXACT1, "exact1", ARG_SPECIAL },
! 5473: { OP_EXACT2, "exact2", ARG_SPECIAL },
! 5474: { OP_EXACT3, "exact3", ARG_SPECIAL },
! 5475: { OP_EXACT4, "exact4", ARG_SPECIAL },
! 5476: { OP_EXACT5, "exact5", ARG_SPECIAL },
! 5477: { OP_EXACTN, "exactn", ARG_SPECIAL },
! 5478: { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
! 5479: { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
! 5480: { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
! 5481: { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
! 5482: { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
! 5483: { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
! 5484: { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
! 5485: { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
! 5486: { OP_CCLASS, "cclass", ARG_SPECIAL },
! 5487: { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
! 5488: { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
! 5489: { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
! 5490: { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
! 5491: { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
! 5492: { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
! 5493: { OP_ANYCHAR, "anychar", ARG_NON },
! 5494: { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
! 5495: { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
! 5496: { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
! 5497: { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
! 5498: { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
! 5499: { OP_WORD, "word", ARG_NON },
! 5500: { OP_NOT_WORD, "not-word", ARG_NON },
! 5501: { OP_WORD_BOUND, "word-bound", ARG_NON },
! 5502: { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
! 5503: { OP_WORD_BEGIN, "word-begin", ARG_NON },
! 5504: { OP_WORD_END, "word-end", ARG_NON },
! 5505: { OP_BEGIN_BUF, "begin-buf", ARG_NON },
! 5506: { OP_END_BUF, "end-buf", ARG_NON },
! 5507: { OP_BEGIN_LINE, "begin-line", ARG_NON },
! 5508: { OP_END_LINE, "end-line", ARG_NON },
! 5509: { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
! 5510: { OP_BEGIN_POSITION, "begin-position", ARG_NON },
! 5511: { OP_BACKREF1, "backref1", ARG_NON },
! 5512: { OP_BACKREF2, "backref2", ARG_NON },
! 5513: { OP_BACKREFN, "backrefn", ARG_MEMNUM },
! 5514: { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
! 5515: { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
! 5516: { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
! 5517: { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
! 5518: { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
! 5519: { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
! 5520: { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
! 5521: { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
! 5522: { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
! 5523: { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
! 5524: { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
! 5525: { OP_SET_OPTION, "set-option", ARG_OPTION },
! 5526: { OP_FAIL, "fail", ARG_NON },
! 5527: { OP_JUMP, "jump", ARG_RELADDR },
! 5528: { OP_PUSH, "push", ARG_RELADDR },
! 5529: { OP_POP, "pop", ARG_NON },
! 5530: { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
! 5531: { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
! 5532: { OP_REPEAT, "repeat", ARG_SPECIAL },
! 5533: { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
! 5534: { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
! 5535: { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
! 5536: { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
! 5537: { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
! 5538: { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
! 5539: { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
! 5540: { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
! 5541: { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
! 5542: { OP_PUSH_POS, "push-pos", ARG_NON },
! 5543: { OP_POP_POS, "pop-pos", ARG_NON },
! 5544: { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
! 5545: { OP_FAIL_POS, "fail-pos", ARG_NON },
! 5546: { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
! 5547: { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
! 5548: { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
! 5549: { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
! 5550: { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
! 5551: { OP_CALL, "call", ARG_ABSADDR },
! 5552: { OP_RETURN, "return", ARG_NON },
! 5553: { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
! 5554: { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
! 5555: { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
! 5556: { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
! 5557: { OP_STATE_CHECK_ANYCHAR_ML_STAR,
! 5558: "state-check-anychar-ml*", ARG_STATE_CHECK },
! 5559: { -1, "", ARG_NON }
! 5560: };
! 5561:
! 5562: static char*
! 5563: op2name(int opcode)
! 5564: {
! 5565: int i;
! 5566:
! 5567: for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
! 5568: if (opcode == OnigOpInfo[i].opcode)
! 5569: return OnigOpInfo[i].name;
! 5570: }
! 5571: return "";
! 5572: }
! 5573:
! 5574: static int
! 5575: op2arg_type(int opcode)
! 5576: {
! 5577: int i;
! 5578:
! 5579: for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
! 5580: if (opcode == OnigOpInfo[i].opcode)
! 5581: return OnigOpInfo[i].arg_type;
! 5582: }
! 5583: return ARG_SPECIAL;
! 5584: }
! 5585:
! 5586: static void
! 5587: Indent(FILE* f, int indent)
! 5588: {
! 5589: int i;
! 5590: for (i = 0; i < indent; i++) putc(' ', f);
! 5591: }
! 5592:
! 5593: static void
! 5594: p_string(FILE* f, int len, UChar* s)
! 5595: {
! 5596: fputs(":", f);
! 5597: while (len-- > 0) { fputc(*s++, f); }
! 5598: }
! 5599:
! 5600: static void
! 5601: p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
! 5602: {
! 5603: int x = len * mb_len;
! 5604:
! 5605: fprintf(f, ":%d:", len);
! 5606: while (x-- > 0) { fputc(*s++, f); }
! 5607: }
! 5608:
! 5609: extern void
! 5610: onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
! 5611: OnigEncoding enc)
! 5612: {
! 5613: int i, n, arg_type;
! 5614: RelAddrType addr;
! 5615: LengthType len;
! 5616: MemNumType mem;
! 5617: StateCheckNumType scn;
! 5618: OnigCodePoint code;
! 5619: UChar *q;
! 5620:
! 5621: fprintf(f, "[%s", op2name(*bp));
! 5622: arg_type = op2arg_type(*bp);
! 5623: if (arg_type != ARG_SPECIAL) {
! 5624: bp++;
! 5625: switch (arg_type) {
! 5626: case ARG_NON:
! 5627: break;
! 5628: case ARG_RELADDR:
! 5629: GET_RELADDR_INC(addr, bp);
! 5630: fprintf(f, ":(%d)", addr);
! 5631: break;
! 5632: case ARG_ABSADDR:
! 5633: GET_ABSADDR_INC(addr, bp);
! 5634: fprintf(f, ":(%d)", addr);
! 5635: break;
! 5636: case ARG_LENGTH:
! 5637: GET_LENGTH_INC(len, bp);
! 5638: fprintf(f, ":%d", len);
! 5639: break;
! 5640: case ARG_MEMNUM:
! 5641: mem = *((MemNumType* )bp);
! 5642: bp += SIZE_MEMNUM;
! 5643: fprintf(f, ":%d", mem);
! 5644: break;
! 5645: case ARG_OPTION:
! 5646: {
! 5647: OnigOptionType option = *((OnigOptionType* )bp);
! 5648: bp += SIZE_OPTION;
! 5649: fprintf(f, ":%d", option);
! 5650: }
! 5651: break;
! 5652:
! 5653: case ARG_STATE_CHECK:
! 5654: scn = *((StateCheckNumType* )bp);
! 5655: bp += SIZE_STATE_CHECK_NUM;
! 5656: fprintf(f, ":%d", scn);
! 5657: break;
! 5658: }
! 5659: }
! 5660: else {
! 5661: switch (*bp++) {
! 5662: case OP_EXACT1:
! 5663: case OP_ANYCHAR_STAR_PEEK_NEXT:
! 5664: case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
! 5665: p_string(f, 1, bp++); break;
! 5666: case OP_EXACT2:
! 5667: p_string(f, 2, bp); bp += 2; break;
! 5668: case OP_EXACT3:
! 5669: p_string(f, 3, bp); bp += 3; break;
! 5670: case OP_EXACT4:
! 5671: p_string(f, 4, bp); bp += 4; break;
! 5672: case OP_EXACT5:
! 5673: p_string(f, 5, bp); bp += 5; break;
! 5674: case OP_EXACTN:
! 5675: GET_LENGTH_INC(len, bp);
! 5676: p_len_string(f, len, 1, bp);
! 5677: bp += len;
! 5678: break;
! 5679:
! 5680: case OP_EXACTMB2N1:
! 5681: p_string(f, 2, bp); bp += 2; break;
! 5682: case OP_EXACTMB2N2:
! 5683: p_string(f, 4, bp); bp += 4; break;
! 5684: case OP_EXACTMB2N3:
! 5685: p_string(f, 6, bp); bp += 6; break;
! 5686: case OP_EXACTMB2N:
! 5687: GET_LENGTH_INC(len, bp);
! 5688: p_len_string(f, len, 2, bp);
! 5689: bp += len * 2;
! 5690: break;
! 5691: case OP_EXACTMB3N:
! 5692: GET_LENGTH_INC(len, bp);
! 5693: p_len_string(f, len, 3, bp);
! 5694: bp += len * 3;
! 5695: break;
! 5696: case OP_EXACTMBN:
! 5697: {
! 5698: int mb_len;
! 5699:
! 5700: GET_LENGTH_INC(mb_len, bp);
! 5701: GET_LENGTH_INC(len, bp);
! 5702: fprintf(f, ":%d:%d:", mb_len, len);
! 5703: n = len * mb_len;
! 5704: while (n-- > 0) { fputc(*bp++, f); }
! 5705: }
! 5706: break;
! 5707:
! 5708: case OP_EXACT1_IC:
! 5709: len = enc_len(enc, bp);
! 5710: p_string(f, len, bp);
! 5711: bp += len;
! 5712: break;
! 5713: case OP_EXACTN_IC:
! 5714: GET_LENGTH_INC(len, bp);
! 5715: p_len_string(f, len, 1, bp);
! 5716: bp += len;
! 5717: break;
! 5718:
! 5719: case OP_CCLASS:
! 5720: n = bitset_on_num((BitSetRef )bp);
! 5721: bp += SIZE_BITSET;
! 5722: fprintf(f, ":%d", n);
! 5723: break;
! 5724:
! 5725: case OP_CCLASS_NOT:
! 5726: n = bitset_on_num((BitSetRef )bp);
! 5727: bp += SIZE_BITSET;
! 5728: fprintf(f, ":%d", n);
! 5729: break;
! 5730:
! 5731: case OP_CCLASS_MB:
! 5732: case OP_CCLASS_MB_NOT:
! 5733: GET_LENGTH_INC(len, bp);
! 5734: q = bp;
! 5735: #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
! 5736: ALIGNMENT_RIGHT(q);
! 5737: #endif
! 5738: GET_CODE_POINT(code, q);
! 5739: bp += len;
! 5740: fprintf(f, ":%d:%d", (int )code, len);
! 5741: break;
! 5742:
! 5743: case OP_CCLASS_MIX:
! 5744: case OP_CCLASS_MIX_NOT:
! 5745: n = bitset_on_num((BitSetRef )bp);
! 5746: bp += SIZE_BITSET;
! 5747: GET_LENGTH_INC(len, bp);
! 5748: q = bp;
! 5749: #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
! 5750: ALIGNMENT_RIGHT(q);
! 5751: #endif
! 5752: GET_CODE_POINT(code, q);
! 5753: bp += len;
! 5754: fprintf(f, ":%d:%d:%d", n, (int )code, len);
! 5755: break;
! 5756:
! 5757: case OP_CCLASS_NODE:
! 5758: {
! 5759: CClassNode *cc;
! 5760:
! 5761: GET_POINTER_INC(cc, bp);
! 5762: n = bitset_on_num(cc->bs);
! 5763: fprintf(f, ":%u:%d", (unsigned int )cc, n);
! 5764: }
! 5765: break;
! 5766:
! 5767: case OP_BACKREFN_IC:
! 5768: mem = *((MemNumType* )bp);
! 5769: bp += SIZE_MEMNUM;
! 5770: fprintf(f, ":%d", mem);
! 5771: break;
! 5772:
! 5773: case OP_BACKREF_MULTI_IC:
! 5774: case OP_BACKREF_MULTI:
! 5775: fputs(" ", f);
! 5776: GET_LENGTH_INC(len, bp);
! 5777: for (i = 0; i < len; i++) {
! 5778: GET_MEMNUM_INC(mem, bp);
! 5779: if (i > 0) fputs(", ", f);
! 5780: fprintf(f, "%d", mem);
! 5781: }
! 5782: break;
! 5783:
! 5784: case OP_BACKREF_AT_LEVEL:
! 5785: {
! 5786: OnigOptionType option;
! 5787: LengthType level;
! 5788:
! 5789: GET_OPTION_INC(option, bp);
! 5790: fprintf(f, ":%d", option);
! 5791: GET_LENGTH_INC(level, bp);
! 5792: fprintf(f, ":%d", level);
! 5793:
! 5794: fputs(" ", f);
! 5795: GET_LENGTH_INC(len, bp);
! 5796: for (i = 0; i < len; i++) {
! 5797: GET_MEMNUM_INC(mem, bp);
! 5798: if (i > 0) fputs(", ", f);
! 5799: fprintf(f, "%d", mem);
! 5800: }
! 5801: }
! 5802: break;
! 5803:
! 5804: case OP_REPEAT:
! 5805: case OP_REPEAT_NG:
! 5806: {
! 5807: mem = *((MemNumType* )bp);
! 5808: bp += SIZE_MEMNUM;
! 5809: addr = *((RelAddrType* )bp);
! 5810: bp += SIZE_RELADDR;
! 5811: fprintf(f, ":%d:%d", mem, addr);
! 5812: }
! 5813: break;
! 5814:
! 5815: case OP_PUSH_OR_JUMP_EXACT1:
! 5816: case OP_PUSH_IF_PEEK_NEXT:
! 5817: addr = *((RelAddrType* )bp);
! 5818: bp += SIZE_RELADDR;
! 5819: fprintf(f, ":(%d)", addr);
! 5820: p_string(f, 1, bp);
! 5821: bp += 1;
! 5822: break;
! 5823:
! 5824: case OP_LOOK_BEHIND:
! 5825: GET_LENGTH_INC(len, bp);
! 5826: fprintf(f, ":%d", len);
! 5827: break;
! 5828:
! 5829: case OP_PUSH_LOOK_BEHIND_NOT:
! 5830: GET_RELADDR_INC(addr, bp);
! 5831: GET_LENGTH_INC(len, bp);
! 5832: fprintf(f, ":%d:(%d)", len, addr);
! 5833: break;
! 5834:
! 5835: case OP_STATE_CHECK_PUSH:
! 5836: case OP_STATE_CHECK_PUSH_OR_JUMP:
! 5837: scn = *((StateCheckNumType* )bp);
! 5838: bp += SIZE_STATE_CHECK_NUM;
! 5839: addr = *((RelAddrType* )bp);
! 5840: bp += SIZE_RELADDR;
! 5841: fprintf(f, ":%d:(%d)", scn, addr);
! 5842: break;
! 5843:
! 5844: default:
! 5845: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
! 5846: *--bp);
! 5847: }
! 5848: }
! 5849: fputs("]", f);
! 5850: if (nextp) *nextp = bp;
! 5851: }
! 5852:
! 5853: static void
! 5854: print_compiled_byte_code_list(FILE* f, regex_t* reg)
! 5855: {
! 5856: int ncode;
! 5857: UChar* bp = reg->p;
! 5858: UChar* end = reg->p + reg->used;
! 5859:
! 5860: fprintf(f, "code length: %d\n", reg->used);
! 5861:
! 5862: ncode = 0;
! 5863: while (bp < end) {
! 5864: ncode++;
! 5865: if (bp > reg->p) {
! 5866: if (ncode % 5 == 0)
! 5867: fprintf(f, "\n");
! 5868: else
! 5869: fputs(" ", f);
! 5870: }
! 5871: onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
! 5872: }
! 5873:
! 5874: fprintf(f, "\n");
! 5875: }
! 5876:
! 5877: static void
! 5878: print_indent_tree(FILE* f, Node* node, int indent)
! 5879: {
! 5880: int i, type;
! 5881: int add = 3;
! 5882: UChar* p;
! 5883:
! 5884: Indent(f, indent);
! 5885: if (IS_NULL(node)) {
! 5886: fprintf(f, "ERROR: null node!!!\n");
! 5887: exit (0);
! 5888: }
! 5889:
! 5890: type = NTYPE(node);
! 5891: switch (type) {
! 5892: case N_LIST:
! 5893: case N_ALT:
! 5894: if (NTYPE(node) == N_LIST)
! 5895: fprintf(f, "<list:%x>\n", (int )node);
! 5896: else
! 5897: fprintf(f, "<alt:%x>\n", (int )node);
! 5898:
! 5899: print_indent_tree(f, NCONS(node).left, indent + add);
! 5900: while (IS_NOT_NULL(node = NCONS(node).right)) {
! 5901: if (NTYPE(node) != type) {
! 5902: fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
! 5903: exit(0);
! 5904: }
! 5905: print_indent_tree(f, NCONS(node).left, indent + add);
! 5906: }
! 5907: break;
! 5908:
! 5909: case N_STRING:
! 5910: fprintf(f, "<string%s:%x>",
! 5911: (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
! 5912: for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
! 5913: if (*p >= 0x20 && *p < 0x7f)
! 5914: fputc(*p, f);
! 5915: else {
! 5916: fprintf(f, " 0x%02x", *p);
! 5917: }
! 5918: }
! 5919: break;
! 5920:
! 5921: case N_CCLASS:
! 5922: fprintf(f, "<cclass:%x>", (int )node);
! 5923: if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
! 5924: if (NCCLASS(node).mbuf) {
! 5925: BBuf* bbuf = NCCLASS(node).mbuf;
! 5926: for (i = 0; i < bbuf->used; i++) {
! 5927: if (i > 0) fprintf(f, ",");
! 5928: fprintf(f, "%0x", bbuf->p[i]);
! 5929: }
! 5930: }
! 5931: break;
! 5932:
! 5933: case N_CTYPE:
! 5934: fprintf(f, "<ctype:%x> ", (int )node);
! 5935: switch (NCTYPE(node).type) {
! 5936: case CTYPE_WORD: fputs("word", f); break;
! 5937: case CTYPE_NOT_WORD: fputs("not word", f); break;
! 5938: default:
! 5939: fprintf(f, "ERROR: undefined ctype.\n");
! 5940: exit(0);
! 5941: }
! 5942: break;
! 5943:
! 5944: case N_ANYCHAR:
! 5945: fprintf(f, "<anychar:%x>", (int )node);
! 5946: break;
! 5947:
! 5948: case N_ANCHOR:
! 5949: fprintf(f, "<anchor:%x> ", (int )node);
! 5950: switch (NANCHOR(node).type) {
! 5951: case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
! 5952: case ANCHOR_END_BUF: fputs("end buf", f); break;
! 5953: case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
! 5954: case ANCHOR_END_LINE: fputs("end line", f); break;
! 5955: case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
! 5956: case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
! 5957:
! 5958: case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
! 5959: case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
! 5960: #ifdef USE_WORD_BEGIN_END
! 5961: case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
! 5962: case ANCHOR_WORD_END: fputs("word end", f); break;
! 5963: #endif
! 5964: case ANCHOR_PREC_READ: fputs("prec read", f); break;
! 5965: case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
! 5966: case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
! 5967: case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
! 5968:
! 5969: default:
! 5970: fprintf(f, "ERROR: undefined anchor type.\n");
! 5971: break;
! 5972: }
! 5973: break;
! 5974:
! 5975: case N_BACKREF:
! 5976: {
! 5977: int* p;
! 5978: BackrefNode* br = &(NBACKREF(node));
! 5979: p = BACKREFS_P(br);
! 5980: fprintf(f, "<backref:%x>", (int )node);
! 5981: for (i = 0; i < br->back_num; i++) {
! 5982: if (i > 0) fputs(", ", f);
! 5983: fprintf(f, "%d", p[i]);
! 5984: }
! 5985: }
! 5986: break;
! 5987:
! 5988: #ifdef USE_SUBEXP_CALL
! 5989: case N_CALL:
! 5990: {
! 5991: CallNode* cn = &(NCALL(node));
! 5992: fprintf(f, "<call:%x>", (int )node);
! 5993: p_string(f, cn->name_end - cn->name, cn->name);
! 5994: }
! 5995: break;
! 5996: #endif
! 5997:
! 5998: case N_QUANTIFIER:
! 5999: fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
! 6000: NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
! 6001: (NQUANTIFIER(node).greedy ? "" : "?"));
! 6002: print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
! 6003: break;
! 6004:
! 6005: case N_EFFECT:
! 6006: fprintf(f, "<effect:%x> ", (int )node);
! 6007: switch (NEFFECT(node).type) {
! 6008: case EFFECT_OPTION:
! 6009: fprintf(f, "option:%d\n", NEFFECT(node).option);
! 6010: print_indent_tree(f, NEFFECT(node).target, indent + add);
! 6011: break;
! 6012: case EFFECT_MEMORY:
! 6013: fprintf(f, "memory:%d", NEFFECT(node).regnum);
! 6014: break;
! 6015: case EFFECT_STOP_BACKTRACK:
! 6016: fprintf(f, "stop-bt");
! 6017: break;
! 6018:
! 6019: default:
! 6020: break;
! 6021: }
! 6022: fprintf(f, "\n");
! 6023: print_indent_tree(f, NEFFECT(node).target, indent + add);
! 6024: break;
! 6025:
! 6026: default:
! 6027: fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
! 6028: break;
! 6029: }
! 6030:
! 6031: if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
! 6032: type != N_EFFECT)
! 6033: fprintf(f, "\n");
! 6034: fflush(f);
! 6035: }
! 6036: #endif /* ONIG_DEBUG */
! 6037:
! 6038: #ifdef ONIG_DEBUG_PARSE_TREE
! 6039: static void
! 6040: print_tree(FILE* f, Node* node)
! 6041: {
! 6042: print_indent_tree(f, node, 0);
! 6043: }
! 6044: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>