Annotation of embedaddon/php/ext/mbstring/oniguruma/regexec.c, revision 1.1
1.1 ! misho 1: /**********************************************************************
! 2: regexec.c - Oniguruma (regular expression library)
! 3: **********************************************************************/
! 4: /*-
! 5: * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include "regint.h"
! 31:
! 32: #ifdef USE_CRNL_AS_LINE_TERMINATOR
! 33: #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
! 34: (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
! 35: ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
! 36: #endif
! 37:
! 38: #ifdef USE_CAPTURE_HISTORY
! 39: static void history_tree_free(OnigCaptureTreeNode* node);
! 40:
! 41: static void
! 42: history_tree_clear(OnigCaptureTreeNode* node)
! 43: {
! 44: int i;
! 45:
! 46: if (IS_NOT_NULL(node)) {
! 47: for (i = 0; i < node->num_childs; i++) {
! 48: if (IS_NOT_NULL(node->childs[i])) {
! 49: history_tree_free(node->childs[i]);
! 50: }
! 51: }
! 52: for (i = 0; i < node->allocated; i++) {
! 53: node->childs[i] = (OnigCaptureTreeNode* )0;
! 54: }
! 55: node->num_childs = 0;
! 56: node->beg = ONIG_REGION_NOTPOS;
! 57: node->end = ONIG_REGION_NOTPOS;
! 58: node->group = -1;
! 59: }
! 60: }
! 61:
! 62: static void
! 63: history_tree_free(OnigCaptureTreeNode* node)
! 64: {
! 65: history_tree_clear(node);
! 66: xfree(node);
! 67: }
! 68:
! 69: static void
! 70: history_root_free(OnigRegion* r)
! 71: {
! 72: if (IS_NOT_NULL(r->history_root)) {
! 73: history_tree_free(r->history_root);
! 74: r->history_root = (OnigCaptureTreeNode* )0;
! 75: }
! 76: }
! 77:
! 78: static OnigCaptureTreeNode*
! 79: history_node_new(void)
! 80: {
! 81: OnigCaptureTreeNode* node;
! 82:
! 83: node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
! 84: CHECK_NULL_RETURN(node);
! 85: node->childs = (OnigCaptureTreeNode** )0;
! 86: node->allocated = 0;
! 87: node->num_childs = 0;
! 88: node->group = -1;
! 89: node->beg = ONIG_REGION_NOTPOS;
! 90: node->end = ONIG_REGION_NOTPOS;
! 91:
! 92: return node;
! 93: }
! 94:
! 95: static int
! 96: history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
! 97: {
! 98: #define HISTORY_TREE_INIT_ALLOC_SIZE 8
! 99:
! 100: if (parent->num_childs >= parent->allocated) {
! 101: int n, i;
! 102:
! 103: if (IS_NULL(parent->childs)) {
! 104: n = HISTORY_TREE_INIT_ALLOC_SIZE;
! 105: parent->childs =
! 106: (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
! 107: }
! 108: else {
! 109: n = parent->allocated * 2;
! 110: parent->childs =
! 111: (OnigCaptureTreeNode** )xrealloc(parent->childs,
! 112: sizeof(OnigCaptureTreeNode*) * n);
! 113: }
! 114: CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
! 115: for (i = parent->allocated; i < n; i++) {
! 116: parent->childs[i] = (OnigCaptureTreeNode* )0;
! 117: }
! 118: parent->allocated = n;
! 119: }
! 120:
! 121: parent->childs[parent->num_childs] = child;
! 122: parent->num_childs++;
! 123: return 0;
! 124: }
! 125:
! 126: static OnigCaptureTreeNode*
! 127: history_tree_clone(OnigCaptureTreeNode* node)
! 128: {
! 129: int i;
! 130: OnigCaptureTreeNode *clone, *child;
! 131:
! 132: clone = history_node_new();
! 133: CHECK_NULL_RETURN(clone);
! 134:
! 135: clone->beg = node->beg;
! 136: clone->end = node->end;
! 137: for (i = 0; i < node->num_childs; i++) {
! 138: child = history_tree_clone(node->childs[i]);
! 139: if (IS_NULL(child)) {
! 140: history_tree_free(clone);
! 141: return (OnigCaptureTreeNode* )0;
! 142: }
! 143: history_tree_add_child(clone, child);
! 144: }
! 145:
! 146: return clone;
! 147: }
! 148:
! 149: extern OnigCaptureTreeNode*
! 150: onig_get_capture_tree(OnigRegion* region)
! 151: {
! 152: return region->history_root;
! 153: }
! 154: #endif /* USE_CAPTURE_HISTORY */
! 155:
! 156: extern void
! 157: onig_region_clear(OnigRegion* region)
! 158: {
! 159: int i;
! 160:
! 161: for (i = 0; i < region->num_regs; i++) {
! 162: region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
! 163: }
! 164: #ifdef USE_CAPTURE_HISTORY
! 165: history_root_free(region);
! 166: #endif
! 167: }
! 168:
! 169: extern int
! 170: onig_region_resize(OnigRegion* region, int n)
! 171: {
! 172: region->num_regs = n;
! 173:
! 174: if (n < ONIG_NREGION)
! 175: n = ONIG_NREGION;
! 176:
! 177: if (region->allocated == 0) {
! 178: region->beg = (int* )xmalloc(n * sizeof(int));
! 179: region->end = (int* )xmalloc(n * sizeof(int));
! 180:
! 181: if (region->beg == 0 || region->end == 0)
! 182: return ONIGERR_MEMORY;
! 183:
! 184: region->allocated = n;
! 185: }
! 186: else if (region->allocated < n) {
! 187: region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
! 188: region->end = (int* )xrealloc(region->end, n * sizeof(int));
! 189:
! 190: if (region->beg == 0 || region->end == 0)
! 191: return ONIGERR_MEMORY;
! 192:
! 193: region->allocated = n;
! 194: }
! 195:
! 196: return 0;
! 197: }
! 198:
! 199: extern int
! 200: onig_region_resize_clear(OnigRegion* region, int n)
! 201: {
! 202: int r;
! 203:
! 204: r = onig_region_resize(region, n);
! 205: if (r != 0) return r;
! 206: onig_region_clear(region);
! 207: return 0;
! 208: }
! 209:
! 210: extern int
! 211: onig_region_set(OnigRegion* region, int at, int beg, int end)
! 212: {
! 213: if (at < 0) return ONIGERR_INVALID_ARGUMENT;
! 214:
! 215: if (at >= region->allocated) {
! 216: int r = onig_region_resize(region, at + 1);
! 217: if (r < 0) return r;
! 218: }
! 219:
! 220: region->beg[at] = beg;
! 221: region->end[at] = end;
! 222: return 0;
! 223: }
! 224:
! 225: extern void
! 226: onig_region_init(OnigRegion* region)
! 227: {
! 228: region->num_regs = 0;
! 229: region->allocated = 0;
! 230: region->beg = (int* )0;
! 231: region->end = (int* )0;
! 232: region->history_root = (OnigCaptureTreeNode* )0;
! 233: }
! 234:
! 235: extern OnigRegion*
! 236: onig_region_new(void)
! 237: {
! 238: OnigRegion* r;
! 239:
! 240: r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
! 241: onig_region_init(r);
! 242: return r;
! 243: }
! 244:
! 245: extern void
! 246: onig_region_free(OnigRegion* r, int free_self)
! 247: {
! 248: if (r) {
! 249: if (r->allocated > 0) {
! 250: if (r->beg) xfree(r->beg);
! 251: if (r->end) xfree(r->end);
! 252: r->allocated = 0;
! 253: }
! 254: #ifdef USE_CAPTURE_HISTORY
! 255: history_root_free(r);
! 256: #endif
! 257: if (free_self) xfree(r);
! 258: }
! 259: }
! 260:
! 261: extern void
! 262: onig_region_copy(OnigRegion* to, OnigRegion* from)
! 263: {
! 264: #define RREGC_SIZE (sizeof(int) * from->num_regs)
! 265: int i;
! 266:
! 267: if (to == from) return;
! 268:
! 269: if (to->allocated == 0) {
! 270: if (from->num_regs > 0) {
! 271: to->beg = (int* )xmalloc(RREGC_SIZE);
! 272: to->end = (int* )xmalloc(RREGC_SIZE);
! 273: to->allocated = from->num_regs;
! 274: }
! 275: }
! 276: else if (to->allocated < from->num_regs) {
! 277: to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
! 278: to->end = (int* )xrealloc(to->end, RREGC_SIZE);
! 279: to->allocated = from->num_regs;
! 280: }
! 281:
! 282: for (i = 0; i < from->num_regs; i++) {
! 283: to->beg[i] = from->beg[i];
! 284: to->end[i] = from->end[i];
! 285: }
! 286: to->num_regs = from->num_regs;
! 287:
! 288: #ifdef USE_CAPTURE_HISTORY
! 289: history_root_free(to);
! 290:
! 291: if (IS_NOT_NULL(from->history_root)) {
! 292: to->history_root = history_tree_clone(from->history_root);
! 293: }
! 294: #endif
! 295: }
! 296:
! 297:
! 298: /** stack **/
! 299: #define INVALID_STACK_INDEX -1
! 300: typedef long StackIndex;
! 301:
! 302: typedef struct _StackType {
! 303: unsigned int type;
! 304: union {
! 305: struct {
! 306: UChar *pcode; /* byte code position */
! 307: UChar *pstr; /* string position */
! 308: UChar *pstr_prev; /* previous char position of pstr */
! 309: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 310: unsigned int state_check;
! 311: #endif
! 312: } state;
! 313: struct {
! 314: int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
! 315: UChar *pcode; /* byte code position (head of repeated target) */
! 316: int num; /* repeat id */
! 317: } repeat;
! 318: struct {
! 319: StackIndex si; /* index of stack */
! 320: } repeat_inc;
! 321: struct {
! 322: int num; /* memory num */
! 323: UChar *pstr; /* start/end position */
! 324: /* Following information is setted, if this stack type is MEM-START */
! 325: StackIndex start; /* prev. info (for backtrack "(...)*" ) */
! 326: StackIndex end; /* prev. info (for backtrack "(...)*" ) */
! 327: } mem;
! 328: struct {
! 329: int num; /* null check id */
! 330: UChar *pstr; /* start position */
! 331: } null_check;
! 332: #ifdef USE_SUBEXP_CALL
! 333: struct {
! 334: UChar *ret_addr; /* byte code position */
! 335: int num; /* null check id */
! 336: UChar *pstr; /* string position */
! 337: } call_frame;
! 338: #endif
! 339: } u;
! 340: } StackType;
! 341:
! 342: /* stack type */
! 343: /* used by normal-POP */
! 344: #define STK_ALT 0x0001
! 345: #define STK_LOOK_BEHIND_NOT 0x0002
! 346: #define STK_POS_NOT 0x0003
! 347: /* handled by normal-POP */
! 348: #define STK_MEM_START 0x0100
! 349: #define STK_MEM_END 0x8200
! 350: #define STK_REPEAT_INC 0x0300
! 351: #define STK_STATE_CHECK_MARK 0x1000
! 352: /* avoided by normal-POP */
! 353: #define STK_NULL_CHECK_START 0x3000
! 354: #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
! 355: #define STK_MEM_END_MARK 0x8400
! 356: #define STK_POS 0x0500 /* used when POP-POS */
! 357: #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
! 358: #define STK_REPEAT 0x0700
! 359: #define STK_CALL_FRAME 0x0800
! 360: #define STK_RETURN 0x0900
! 361: #define STK_VOID 0x0a00 /* for fill a blank */
! 362:
! 363: /* stack type check mask */
! 364: #define STK_MASK_POP_USED 0x00ff
! 365: #define STK_MASK_TO_VOID_TARGET 0x10ff
! 366: #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
! 367:
! 368: typedef struct {
! 369: void* stack_p;
! 370: int stack_n;
! 371: OnigOptionType options;
! 372: OnigRegion* region;
! 373: const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
! 374: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
! 375: int best_len; /* for ONIG_OPTION_FIND_LONGEST */
! 376: UChar* best_s;
! 377: #endif
! 378: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 379: void* state_check_buff;
! 380: int state_check_buff_size;
! 381: #endif
! 382: } MatchArg;
! 383:
! 384: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
! 385: #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
! 386: (msa).stack_p = (void* )0;\
! 387: (msa).options = (arg_option);\
! 388: (msa).region = (arg_region);\
! 389: (msa).start = (arg_start);\
! 390: (msa).best_len = ONIG_MISMATCH;\
! 391: } while (0)
! 392: #else
! 393: #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
! 394: (msa).stack_p = (void* )0;\
! 395: (msa).options = (arg_option);\
! 396: (msa).region = (arg_region);\
! 397: (msa).start = (arg_start);\
! 398: } while (0)
! 399: #endif
! 400:
! 401: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 402:
! 403: #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
! 404:
! 405: #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
! 406: if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
! 407: unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
! 408: offset = ((offset) * (state_num)) >> 3;\
! 409: if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
! 410: if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
! 411: (msa).state_check_buff = (void* )xmalloc(size);\
! 412: else \
! 413: (msa).state_check_buff = (void* )xalloca(size);\
! 414: xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
! 415: (size_t )(size - (offset))); \
! 416: (msa).state_check_buff_size = size;\
! 417: }\
! 418: else {\
! 419: (msa).state_check_buff = (void* )0;\
! 420: (msa).state_check_buff_size = 0;\
! 421: }\
! 422: }\
! 423: else {\
! 424: (msa).state_check_buff = (void* )0;\
! 425: (msa).state_check_buff_size = 0;\
! 426: }\
! 427: } while (0)
! 428:
! 429: #define MATCH_ARG_FREE(msa) do {\
! 430: if ((msa).stack_p) xfree((msa).stack_p);\
! 431: if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
! 432: if ((msa).state_check_buff) xfree((msa).state_check_buff);\
! 433: }\
! 434: } while (0);
! 435: #else
! 436: #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
! 437: #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
! 438: #endif
! 439:
! 440:
! 441:
! 442: #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
! 443: if (msa->stack_p) {\
! 444: alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
! 445: stk_alloc = (StackType* )(msa->stack_p);\
! 446: stk_base = stk_alloc;\
! 447: stk = stk_base;\
! 448: stk_end = stk_base + msa->stack_n;\
! 449: }\
! 450: else {\
! 451: alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
! 452: + sizeof(StackType) * (stack_num));\
! 453: stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
! 454: stk_base = stk_alloc;\
! 455: stk = stk_base;\
! 456: stk_end = stk_base + (stack_num);\
! 457: }\
! 458: } while(0)
! 459:
! 460: #define STACK_SAVE do{\
! 461: if (stk_base != stk_alloc) {\
! 462: msa->stack_p = stk_base;\
! 463: msa->stack_n = stk_end - stk_base;\
! 464: };\
! 465: } while(0)
! 466:
! 467: static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
! 468:
! 469: extern unsigned int
! 470: onig_get_match_stack_limit_size(void)
! 471: {
! 472: return MatchStackLimitSize;
! 473: }
! 474:
! 475: extern int
! 476: onig_set_match_stack_limit_size(unsigned int size)
! 477: {
! 478: MatchStackLimitSize = size;
! 479: return 0;
! 480: }
! 481:
! 482: static int
! 483: stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
! 484: StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
! 485: {
! 486: unsigned int n;
! 487: StackType *x, *stk_base, *stk_end, *stk;
! 488:
! 489: stk_base = *arg_stk_base;
! 490: stk_end = *arg_stk_end;
! 491: stk = *arg_stk;
! 492:
! 493: n = stk_end - stk_base;
! 494: if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
! 495: x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
! 496: if (IS_NULL(x)) {
! 497: STACK_SAVE;
! 498: return ONIGERR_MEMORY;
! 499: }
! 500: xmemcpy(x, stk_base, n * sizeof(StackType));
! 501: n *= 2;
! 502: }
! 503: else {
! 504: n *= 2;
! 505: if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
! 506: if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
! 507: return ONIGERR_MATCH_STACK_LIMIT_OVER;
! 508: else
! 509: n = MatchStackLimitSize;
! 510: }
! 511: x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
! 512: if (IS_NULL(x)) {
! 513: STACK_SAVE;
! 514: return ONIGERR_MEMORY;
! 515: }
! 516: }
! 517: *arg_stk = x + (stk - stk_base);
! 518: *arg_stk_base = x;
! 519: *arg_stk_end = x + n;
! 520: return 0;
! 521: }
! 522:
! 523: #define STACK_ENSURE(n) do {\
! 524: if (stk_end - stk < (n)) {\
! 525: int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
! 526: if (r != 0) { STACK_SAVE; return r; } \
! 527: }\
! 528: } while(0)
! 529:
! 530: #define STACK_AT(index) (stk_base + (index))
! 531: #define GET_STACK_INDEX(stk) ((stk) - stk_base)
! 532:
! 533: #define STACK_PUSH_TYPE(stack_type) do {\
! 534: STACK_ENSURE(1);\
! 535: stk->type = (stack_type);\
! 536: STACK_INC;\
! 537: } while(0)
! 538:
! 539: #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
! 540:
! 541: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 542: #define STATE_CHECK_POS(s,snum) \
! 543: (((s) - str) * num_comb_exp_check + ((snum) - 1))
! 544: #define STATE_CHECK_VAL(v,snum) do {\
! 545: if (state_check_buff != NULL) {\
! 546: int x = STATE_CHECK_POS(s,snum);\
! 547: (v) = state_check_buff[x/8] & (1<<(x%8));\
! 548: }\
! 549: else (v) = 0;\
! 550: } while(0)
! 551:
! 552:
! 553: #define ELSE_IF_STATE_CHECK_MARK(stk) \
! 554: else if ((stk)->type == STK_STATE_CHECK_MARK) { \
! 555: int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
! 556: state_check_buff[x/8] |= (1<<(x%8)); \
! 557: }
! 558:
! 559: #define STACK_PUSH(stack_type,pat,s,sprev) do {\
! 560: STACK_ENSURE(1);\
! 561: stk->type = (stack_type);\
! 562: stk->u.state.pcode = (pat);\
! 563: stk->u.state.pstr = (s);\
! 564: stk->u.state.pstr_prev = (sprev);\
! 565: stk->u.state.state_check = 0;\
! 566: STACK_INC;\
! 567: } while(0)
! 568:
! 569: #define STACK_PUSH_ENSURED(stack_type,pat) do {\
! 570: stk->type = (stack_type);\
! 571: stk->u.state.pcode = (pat);\
! 572: stk->u.state.state_check = 0;\
! 573: STACK_INC;\
! 574: } while(0)
! 575:
! 576: #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
! 577: STACK_ENSURE(1);\
! 578: stk->type = STK_ALT;\
! 579: stk->u.state.pcode = (pat);\
! 580: stk->u.state.pstr = (s);\
! 581: stk->u.state.pstr_prev = (sprev);\
! 582: stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
! 583: STACK_INC;\
! 584: } while(0)
! 585:
! 586: #define STACK_PUSH_STATE_CHECK(s,snum) do {\
! 587: if (state_check_buff != NULL) {\
! 588: STACK_ENSURE(1);\
! 589: stk->type = STK_STATE_CHECK_MARK;\
! 590: stk->u.state.pstr = (s);\
! 591: stk->u.state.state_check = (snum);\
! 592: STACK_INC;\
! 593: }\
! 594: } while(0)
! 595:
! 596: #else /* USE_COMBINATION_EXPLOSION_CHECK */
! 597:
! 598: #define ELSE_IF_STATE_CHECK_MARK(stk)
! 599:
! 600: #define STACK_PUSH(stack_type,pat,s,sprev) do {\
! 601: STACK_ENSURE(1);\
! 602: stk->type = (stack_type);\
! 603: stk->u.state.pcode = (pat);\
! 604: stk->u.state.pstr = (s);\
! 605: stk->u.state.pstr_prev = (sprev);\
! 606: STACK_INC;\
! 607: } while(0)
! 608:
! 609: #define STACK_PUSH_ENSURED(stack_type,pat) do {\
! 610: stk->type = (stack_type);\
! 611: stk->u.state.pcode = (pat);\
! 612: STACK_INC;\
! 613: } while(0)
! 614: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
! 615:
! 616: #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
! 617: #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
! 618: #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
! 619: #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
! 620: #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
! 621: STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
! 622:
! 623: #define STACK_PUSH_REPEAT(id, pat) do {\
! 624: STACK_ENSURE(1);\
! 625: stk->type = STK_REPEAT;\
! 626: stk->u.repeat.num = (id);\
! 627: stk->u.repeat.pcode = (pat);\
! 628: stk->u.repeat.count = 0;\
! 629: STACK_INC;\
! 630: } while(0)
! 631:
! 632: #define STACK_PUSH_REPEAT_INC(sindex) do {\
! 633: STACK_ENSURE(1);\
! 634: stk->type = STK_REPEAT_INC;\
! 635: stk->u.repeat_inc.si = (sindex);\
! 636: STACK_INC;\
! 637: } while(0)
! 638:
! 639: #define STACK_PUSH_MEM_START(mnum, s) do {\
! 640: STACK_ENSURE(1);\
! 641: stk->type = STK_MEM_START;\
! 642: stk->u.mem.num = (mnum);\
! 643: stk->u.mem.pstr = (s);\
! 644: stk->u.mem.start = mem_start_stk[mnum];\
! 645: stk->u.mem.end = mem_end_stk[mnum];\
! 646: mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
! 647: mem_end_stk[mnum] = INVALID_STACK_INDEX;\
! 648: STACK_INC;\
! 649: } while(0)
! 650:
! 651: #define STACK_PUSH_MEM_END(mnum, s) do {\
! 652: STACK_ENSURE(1);\
! 653: stk->type = STK_MEM_END;\
! 654: stk->u.mem.num = (mnum);\
! 655: stk->u.mem.pstr = (s);\
! 656: stk->u.mem.start = mem_start_stk[mnum];\
! 657: stk->u.mem.end = mem_end_stk[mnum];\
! 658: mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
! 659: STACK_INC;\
! 660: } while(0)
! 661:
! 662: #define STACK_PUSH_MEM_END_MARK(mnum) do {\
! 663: STACK_ENSURE(1);\
! 664: stk->type = STK_MEM_END_MARK;\
! 665: stk->u.mem.num = (mnum);\
! 666: STACK_INC;\
! 667: } while(0)
! 668:
! 669: #define STACK_GET_MEM_START(mnum, k) do {\
! 670: int level = 0;\
! 671: k = stk;\
! 672: while (k > stk_base) {\
! 673: k--;\
! 674: if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
! 675: && k->u.mem.num == (mnum)) {\
! 676: level++;\
! 677: }\
! 678: else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
! 679: if (level == 0) break;\
! 680: level--;\
! 681: }\
! 682: }\
! 683: } while (0)
! 684:
! 685: #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
! 686: int level = 0;\
! 687: while (k < stk) {\
! 688: if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
! 689: if (level == 0) (start) = k->u.mem.pstr;\
! 690: level++;\
! 691: }\
! 692: else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
! 693: level--;\
! 694: if (level == 0) {\
! 695: (end) = k->u.mem.pstr;\
! 696: break;\
! 697: }\
! 698: }\
! 699: k++;\
! 700: }\
! 701: } while (0)
! 702:
! 703: #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
! 704: STACK_ENSURE(1);\
! 705: stk->type = STK_NULL_CHECK_START;\
! 706: stk->u.null_check.num = (cnum);\
! 707: stk->u.null_check.pstr = (s);\
! 708: STACK_INC;\
! 709: } while(0)
! 710:
! 711: #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
! 712: STACK_ENSURE(1);\
! 713: stk->type = STK_NULL_CHECK_END;\
! 714: stk->u.null_check.num = (cnum);\
! 715: STACK_INC;\
! 716: } while(0)
! 717:
! 718: #define STACK_PUSH_CALL_FRAME(pat) do {\
! 719: STACK_ENSURE(1);\
! 720: stk->type = STK_CALL_FRAME;\
! 721: stk->u.call_frame.ret_addr = (pat);\
! 722: STACK_INC;\
! 723: } while(0)
! 724:
! 725: #define STACK_PUSH_RETURN do {\
! 726: STACK_ENSURE(1);\
! 727: stk->type = STK_RETURN;\
! 728: STACK_INC;\
! 729: } while(0)
! 730:
! 731:
! 732: #ifdef ONIG_DEBUG
! 733: #define STACK_BASE_CHECK(p, at) \
! 734: if ((p) < stk_base) {\
! 735: fprintf(stderr, "at %s\n", at);\
! 736: goto stack_error;\
! 737: }
! 738: #else
! 739: #define STACK_BASE_CHECK(p, at)
! 740: #endif
! 741:
! 742: #define STACK_POP_ONE do {\
! 743: stk--;\
! 744: STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
! 745: } while(0)
! 746:
! 747: #define STACK_POP do {\
! 748: switch (pop_level) {\
! 749: case STACK_POP_LEVEL_FREE:\
! 750: while (1) {\
! 751: stk--;\
! 752: STACK_BASE_CHECK(stk, "STACK_POP"); \
! 753: if ((stk->type & STK_MASK_POP_USED) != 0) break;\
! 754: ELSE_IF_STATE_CHECK_MARK(stk);\
! 755: }\
! 756: break;\
! 757: case STACK_POP_LEVEL_MEM_START:\
! 758: while (1) {\
! 759: stk--;\
! 760: STACK_BASE_CHECK(stk, "STACK_POP 2"); \
! 761: if ((stk->type & STK_MASK_POP_USED) != 0) break;\
! 762: else if (stk->type == STK_MEM_START) {\
! 763: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 764: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 765: }\
! 766: ELSE_IF_STATE_CHECK_MARK(stk);\
! 767: }\
! 768: break;\
! 769: default:\
! 770: while (1) {\
! 771: stk--;\
! 772: STACK_BASE_CHECK(stk, "STACK_POP 3"); \
! 773: if ((stk->type & STK_MASK_POP_USED) != 0) break;\
! 774: else if (stk->type == STK_MEM_START) {\
! 775: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 776: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 777: }\
! 778: else if (stk->type == STK_REPEAT_INC) {\
! 779: STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
! 780: }\
! 781: else if (stk->type == STK_MEM_END) {\
! 782: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 783: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 784: }\
! 785: ELSE_IF_STATE_CHECK_MARK(stk);\
! 786: }\
! 787: break;\
! 788: }\
! 789: } while(0)
! 790:
! 791: #define STACK_POP_TIL_POS_NOT do {\
! 792: while (1) {\
! 793: stk--;\
! 794: STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
! 795: if (stk->type == STK_POS_NOT) break;\
! 796: else if (stk->type == STK_MEM_START) {\
! 797: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 798: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 799: }\
! 800: else if (stk->type == STK_REPEAT_INC) {\
! 801: STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
! 802: }\
! 803: else if (stk->type == STK_MEM_END) {\
! 804: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 805: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 806: }\
! 807: ELSE_IF_STATE_CHECK_MARK(stk);\
! 808: }\
! 809: } while(0)
! 810:
! 811: #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
! 812: while (1) {\
! 813: stk--;\
! 814: STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
! 815: if (stk->type == STK_LOOK_BEHIND_NOT) break;\
! 816: else if (stk->type == STK_MEM_START) {\
! 817: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 818: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 819: }\
! 820: else if (stk->type == STK_REPEAT_INC) {\
! 821: STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
! 822: }\
! 823: else if (stk->type == STK_MEM_END) {\
! 824: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
! 825: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
! 826: }\
! 827: ELSE_IF_STATE_CHECK_MARK(stk);\
! 828: }\
! 829: } while(0)
! 830:
! 831: #define STACK_POS_END(k) do {\
! 832: k = stk;\
! 833: while (1) {\
! 834: k--;\
! 835: STACK_BASE_CHECK(k, "STACK_POS_END"); \
! 836: if (IS_TO_VOID_TARGET(k)) {\
! 837: k->type = STK_VOID;\
! 838: }\
! 839: else if (k->type == STK_POS) {\
! 840: k->type = STK_VOID;\
! 841: break;\
! 842: }\
! 843: }\
! 844: } while(0)
! 845:
! 846: #define STACK_STOP_BT_END do {\
! 847: StackType *k = stk;\
! 848: while (1) {\
! 849: k--;\
! 850: STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
! 851: if (IS_TO_VOID_TARGET(k)) {\
! 852: k->type = STK_VOID;\
! 853: }\
! 854: else if (k->type == STK_STOP_BT) {\
! 855: k->type = STK_VOID;\
! 856: break;\
! 857: }\
! 858: }\
! 859: } while(0)
! 860:
! 861: #define STACK_NULL_CHECK(isnull,id,s) do {\
! 862: StackType* k = stk;\
! 863: while (1) {\
! 864: k--;\
! 865: STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
! 866: if (k->type == STK_NULL_CHECK_START) {\
! 867: if (k->u.null_check.num == (id)) {\
! 868: (isnull) = (k->u.null_check.pstr == (s));\
! 869: break;\
! 870: }\
! 871: }\
! 872: }\
! 873: } while(0)
! 874:
! 875: #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
! 876: int level = 0;\
! 877: StackType* k = stk;\
! 878: while (1) {\
! 879: k--;\
! 880: STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
! 881: if (k->type == STK_NULL_CHECK_START) {\
! 882: if (k->u.null_check.num == (id)) {\
! 883: if (level == 0) {\
! 884: (isnull) = (k->u.null_check.pstr == (s));\
! 885: break;\
! 886: }\
! 887: else level--;\
! 888: }\
! 889: }\
! 890: else if (k->type == STK_NULL_CHECK_END) {\
! 891: level++;\
! 892: }\
! 893: }\
! 894: } while(0)
! 895:
! 896: #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
! 897: StackType* k = stk;\
! 898: while (1) {\
! 899: k--;\
! 900: STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
! 901: if (k->type == STK_NULL_CHECK_START) {\
! 902: if (k->u.null_check.num == (id)) {\
! 903: if (k->u.null_check.pstr != (s)) {\
! 904: (isnull) = 0;\
! 905: break;\
! 906: }\
! 907: else {\
! 908: UChar* endp;\
! 909: (isnull) = 1;\
! 910: while (k < stk) {\
! 911: if (k->type == STK_MEM_START) {\
! 912: if (k->u.mem.end == INVALID_STACK_INDEX) {\
! 913: (isnull) = 0; break;\
! 914: }\
! 915: if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
! 916: endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
! 917: else\
! 918: endp = (UChar* )k->u.mem.end;\
! 919: if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
! 920: (isnull) = 0; break;\
! 921: }\
! 922: else if (endp != s) {\
! 923: (isnull) = -1; /* empty, but position changed */ \
! 924: }\
! 925: }\
! 926: k++;\
! 927: }\
! 928: break;\
! 929: }\
! 930: }\
! 931: }\
! 932: }\
! 933: } while(0)
! 934:
! 935: #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
! 936: int level = 0;\
! 937: StackType* k = stk;\
! 938: while (1) {\
! 939: k--;\
! 940: STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
! 941: if (k->type == STK_NULL_CHECK_START) {\
! 942: if (k->u.null_check.num == (id)) {\
! 943: if (level == 0) {\
! 944: if (k->u.null_check.pstr != (s)) {\
! 945: (isnull) = 0;\
! 946: break;\
! 947: }\
! 948: else {\
! 949: UChar* endp;\
! 950: (isnull) = 1;\
! 951: while (k < stk) {\
! 952: if (k->type == STK_MEM_START) {\
! 953: if (k->u.mem.end == INVALID_STACK_INDEX) {\
! 954: (isnull) = 0; break;\
! 955: }\
! 956: if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
! 957: endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
! 958: else\
! 959: endp = (UChar* )k->u.mem.end;\
! 960: if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
! 961: (isnull) = 0; break;\
! 962: }\
! 963: else if (endp != s) {\
! 964: (isnull) = -1; /* empty, but position changed */ \
! 965: }\
! 966: }\
! 967: k++;\
! 968: }\
! 969: break;\
! 970: }\
! 971: }\
! 972: else {\
! 973: level--;\
! 974: }\
! 975: }\
! 976: }\
! 977: else if (k->type == STK_NULL_CHECK_END) {\
! 978: if (k->u.null_check.num == (id)) level++;\
! 979: }\
! 980: }\
! 981: } while(0)
! 982:
! 983: #define STACK_GET_REPEAT(id, k) do {\
! 984: int level = 0;\
! 985: k = stk;\
! 986: while (1) {\
! 987: k--;\
! 988: STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
! 989: if (k->type == STK_REPEAT) {\
! 990: if (level == 0) {\
! 991: if (k->u.repeat.num == (id)) {\
! 992: break;\
! 993: }\
! 994: }\
! 995: }\
! 996: else if (k->type == STK_CALL_FRAME) level--;\
! 997: else if (k->type == STK_RETURN) level++;\
! 998: }\
! 999: } while (0)
! 1000:
! 1001: #define STACK_RETURN(addr) do {\
! 1002: int level = 0;\
! 1003: StackType* k = stk;\
! 1004: while (1) {\
! 1005: k--;\
! 1006: STACK_BASE_CHECK(k, "STACK_RETURN"); \
! 1007: if (k->type == STK_CALL_FRAME) {\
! 1008: if (level == 0) {\
! 1009: (addr) = k->u.call_frame.ret_addr;\
! 1010: break;\
! 1011: }\
! 1012: else level--;\
! 1013: }\
! 1014: else if (k->type == STK_RETURN)\
! 1015: level++;\
! 1016: }\
! 1017: } while(0)
! 1018:
! 1019:
! 1020: #define STRING_CMP(s1,s2,len) do {\
! 1021: while (len-- > 0) {\
! 1022: if (*s1++ != *s2++) goto fail;\
! 1023: }\
! 1024: } while(0)
! 1025:
! 1026: #define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
! 1027: if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
! 1028: goto fail; \
! 1029: } while(0)
! 1030:
! 1031: static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
! 1032: UChar* s1, UChar** ps2, int mblen)
! 1033: {
! 1034: UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
! 1035: UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
! 1036: UChar *p1, *p2, *end, *s2, *end2;
! 1037: int len1, len2;
! 1038:
! 1039: s2 = *ps2;
! 1040: end = s1 + mblen;
! 1041: end2 = s2 + mblen;
! 1042: while (s1 < end) {
! 1043: len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
! 1044: len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
! 1045: if (len1 != len2) return 0;
! 1046: p1 = buf1;
! 1047: p2 = buf2;
! 1048: while (len1-- > 0) {
! 1049: if (*p1 != *p2) return 0;
! 1050: p1++;
! 1051: p2++;
! 1052: }
! 1053: }
! 1054:
! 1055: *ps2 = s2;
! 1056: return 1;
! 1057: }
! 1058:
! 1059: #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
! 1060: is_fail = 0;\
! 1061: while (len-- > 0) {\
! 1062: if (*s1++ != *s2++) {\
! 1063: is_fail = 1; break;\
! 1064: }\
! 1065: }\
! 1066: } while(0)
! 1067:
! 1068: #define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
! 1069: if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
! 1070: is_fail = 1; \
! 1071: else \
! 1072: is_fail = 0; \
! 1073: } while(0)
! 1074:
! 1075:
! 1076: #define ON_STR_BEGIN(s) ((s) == str)
! 1077: #define ON_STR_END(s) ((s) == end)
! 1078: #define IS_EMPTY_STR (str == end)
! 1079:
! 1080: #define DATA_ENSURE(n) \
! 1081: if (s + (n) > end) goto fail
! 1082:
! 1083: #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
! 1084:
! 1085: #ifdef USE_CAPTURE_HISTORY
! 1086: static int
! 1087: make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
! 1088: StackType* stk_top, UChar* str, regex_t* reg)
! 1089: {
! 1090: int n, r;
! 1091: OnigCaptureTreeNode* child;
! 1092: StackType* k = *kp;
! 1093:
! 1094: while (k < stk_top) {
! 1095: if (k->type == STK_MEM_START) {
! 1096: n = k->u.mem.num;
! 1097: if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
! 1098: BIT_STATUS_AT(reg->capture_history, n) != 0) {
! 1099: child = history_node_new();
! 1100: CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
! 1101: child->group = n;
! 1102: child->beg = (int )(k->u.mem.pstr - str);
! 1103: r = history_tree_add_child(node, child);
! 1104: if (r != 0) return r;
! 1105: *kp = (k + 1);
! 1106: r = make_capture_history_tree(child, kp, stk_top, str, reg);
! 1107: if (r != 0) return r;
! 1108:
! 1109: k = *kp;
! 1110: child->end = (int )(k->u.mem.pstr - str);
! 1111: }
! 1112: }
! 1113: else if (k->type == STK_MEM_END) {
! 1114: if (k->u.mem.num == node->group) {
! 1115: node->end = (int )(k->u.mem.pstr - str);
! 1116: *kp = k;
! 1117: return 0;
! 1118: }
! 1119: }
! 1120: k++;
! 1121: }
! 1122:
! 1123: return 1; /* 1: root node ending. */
! 1124: }
! 1125: #endif
! 1126:
! 1127: #ifdef USE_BACKREF_AT_LEVEL
! 1128: static int mem_is_in_memp(int mem, int num, UChar* memp)
! 1129: {
! 1130: int i;
! 1131: MemNumType m;
! 1132:
! 1133: for (i = 0; i < num; i++) {
! 1134: GET_MEMNUM_INC(m, memp);
! 1135: if (mem == (int )m) return 1;
! 1136: }
! 1137: return 0;
! 1138: }
! 1139:
! 1140: static int backref_match_at_nested_level(regex_t* reg
! 1141: , StackType* top, StackType* stk_base
! 1142: , int ignore_case, int ambig_flag
! 1143: , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
! 1144: {
! 1145: UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
! 1146: int level;
! 1147: StackType* k;
! 1148:
! 1149: level = 0;
! 1150: k = top;
! 1151: k--;
! 1152: while (k >= stk_base) {
! 1153: if (k->type == STK_CALL_FRAME) {
! 1154: level--;
! 1155: }
! 1156: else if (k->type == STK_RETURN) {
! 1157: level++;
! 1158: }
! 1159: else if (level == nest) {
! 1160: if (k->type == STK_MEM_START) {
! 1161: if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
! 1162: pstart = k->u.mem.pstr;
! 1163: if (pend != NULL_UCHARP) {
! 1164: if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
! 1165: p = pstart;
! 1166: ss = *s;
! 1167:
! 1168: if (ignore_case != 0) {
! 1169: if (string_cmp_ic(reg->enc, ambig_flag,
! 1170: pstart, &ss, (int )(pend - pstart)) == 0)
! 1171: return 0; /* or goto next_mem; */
! 1172: }
! 1173: else {
! 1174: while (p < pend) {
! 1175: if (*p++ != *ss++) return 0; /* or goto next_mem; */
! 1176: }
! 1177: }
! 1178:
! 1179: *s = ss;
! 1180: return 1;
! 1181: }
! 1182: }
! 1183: }
! 1184: else if (k->type == STK_MEM_END) {
! 1185: if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
! 1186: pend = k->u.mem.pstr;
! 1187: }
! 1188: }
! 1189: }
! 1190: k--;
! 1191: }
! 1192:
! 1193: return 0;
! 1194: }
! 1195: #endif /* USE_BACKREF_AT_LEVEL */
! 1196:
! 1197:
! 1198: #ifdef RUBY_PLATFORM
! 1199:
! 1200: typedef struct {
! 1201: int state;
! 1202: regex_t* reg;
! 1203: MatchArg* msa;
! 1204: StackType* stk_base;
! 1205: } TrapEnsureArg;
! 1206:
! 1207: static VALUE
! 1208: trap_ensure(VALUE arg)
! 1209: {
! 1210: TrapEnsureArg* ta = (TrapEnsureArg* )arg;
! 1211:
! 1212: if (ta->state == 0) { /* trap_exec() is not normal return */
! 1213: ONIG_STATE_DEC_THREAD(ta->reg);
! 1214: if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
! 1215: xfree(ta->stk_base);
! 1216:
! 1217: MATCH_ARG_FREE(*(ta->msa));
! 1218: }
! 1219:
! 1220: return Qnil;
! 1221: }
! 1222:
! 1223: static VALUE
! 1224: trap_exec(VALUE arg)
! 1225: {
! 1226: TrapEnsureArg* ta;
! 1227:
! 1228: rb_trap_exec();
! 1229:
! 1230: ta = (TrapEnsureArg* )arg;
! 1231: ta->state = 1; /* normal return */
! 1232: return Qnil;
! 1233: }
! 1234:
! 1235: extern void
! 1236: onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
! 1237: {
! 1238: VALUE arg;
! 1239: TrapEnsureArg ta;
! 1240:
! 1241: ta.state = 0;
! 1242: ta.reg = reg;
! 1243: ta.msa = msa;
! 1244: ta.stk_base = stk_base;
! 1245: arg = (VALUE )(&ta);
! 1246: rb_ensure(trap_exec, arg, trap_ensure, arg);
! 1247: }
! 1248:
! 1249: #define CHECK_INTERRUPT_IN_MATCH_AT do {\
! 1250: if (rb_trap_pending) {\
! 1251: if (! rb_prohibit_interrupt) {\
! 1252: onig_exec_trap(reg, msa, stk_base);\
! 1253: }\
! 1254: }\
! 1255: } while (0)
! 1256: #else
! 1257: #define CHECK_INTERRUPT_IN_MATCH_AT
! 1258: #endif /* RUBY_PLATFORM */
! 1259:
! 1260: #ifdef ONIG_DEBUG_STATISTICS
! 1261:
! 1262: #define USE_TIMEOFDAY
! 1263:
! 1264: #ifdef USE_TIMEOFDAY
! 1265: #ifdef HAVE_SYS_TIME_H
! 1266: #include <sys/time.h>
! 1267: #endif
! 1268: #ifdef HAVE_UNISTD_H
! 1269: #include <unistd.h>
! 1270: #endif
! 1271: static struct timeval ts, te;
! 1272: #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
! 1273: #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
! 1274: (((te).tv_sec - (ts).tv_sec)*1000000))
! 1275: #else
! 1276: #ifdef HAVE_SYS_TIMES_H
! 1277: #include <sys/times.h>
! 1278: #endif
! 1279: static struct tms ts, te;
! 1280: #define GETTIME(t) times(&(t))
! 1281: #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
! 1282: #endif
! 1283:
! 1284: static int OpCounter[256];
! 1285: static int OpPrevCounter[256];
! 1286: static unsigned long OpTime[256];
! 1287: static int OpCurr = OP_FINISH;
! 1288: static int OpPrevTarget = OP_FAIL;
! 1289: static int MaxStackDepth = 0;
! 1290:
! 1291: #define STAT_OP_IN(opcode) do {\
! 1292: if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
! 1293: OpCurr = opcode;\
! 1294: OpCounter[opcode]++;\
! 1295: GETTIME(ts);\
! 1296: } while (0)
! 1297:
! 1298: #define STAT_OP_OUT do {\
! 1299: GETTIME(te);\
! 1300: OpTime[OpCurr] += TIMEDIFF(te, ts);\
! 1301: } while (0)
! 1302:
! 1303: #ifdef RUBY_PLATFORM
! 1304:
! 1305: /*
! 1306: * :nodoc:
! 1307: */
! 1308: static VALUE onig_stat_print(void)
! 1309: {
! 1310: onig_print_statistics(stderr);
! 1311: return Qnil;
! 1312: }
! 1313: #endif
! 1314:
! 1315: extern void onig_statistics_init(void)
! 1316: {
! 1317: int i;
! 1318: for (i = 0; i < 256; i++) {
! 1319: OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
! 1320: }
! 1321: MaxStackDepth = 0;
! 1322:
! 1323: #ifdef RUBY_PLATFORM
! 1324: rb_define_global_function("onig_stat_print", onig_stat_print, 0);
! 1325: #endif
! 1326: }
! 1327:
! 1328: extern void
! 1329: onig_print_statistics(FILE* f)
! 1330: {
! 1331: int i;
! 1332: fprintf(f, " count prev time\n");
! 1333: for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
! 1334: fprintf(f, "%8d: %8d: %10ld: %s\n",
! 1335: OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
! 1336: }
! 1337: fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
! 1338: }
! 1339:
! 1340: #define STACK_INC do {\
! 1341: stk++;\
! 1342: if (stk - stk_base > MaxStackDepth) \
! 1343: MaxStackDepth = stk - stk_base;\
! 1344: } while (0)
! 1345:
! 1346: #else
! 1347: #define STACK_INC stk++
! 1348:
! 1349: #define STAT_OP_IN(opcode)
! 1350: #define STAT_OP_OUT
! 1351: #endif
! 1352:
! 1353: extern int
! 1354: onig_is_in_code_range(const UChar* p, OnigCodePoint code)
! 1355: {
! 1356: OnigCodePoint n, *data;
! 1357: OnigCodePoint low, high, x;
! 1358:
! 1359: GET_CODE_POINT(n, p);
! 1360: data = (OnigCodePoint* )p;
! 1361: data++;
! 1362:
! 1363: for (low = 0, high = n; low < high; ) {
! 1364: x = (low + high) >> 1;
! 1365: if (code > data[x * 2 + 1])
! 1366: low = x + 1;
! 1367: else
! 1368: high = x;
! 1369: }
! 1370:
! 1371: return ((low < n && code >= data[low * 2]) ? 1 : 0);
! 1372: }
! 1373:
! 1374: static int
! 1375: is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
! 1376: {
! 1377: int found;
! 1378:
! 1379: if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
! 1380: if (IS_NULL(cc->mbuf)) {
! 1381: found = 0;
! 1382: }
! 1383: else {
! 1384: found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
! 1385: }
! 1386: }
! 1387: else {
! 1388: found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
! 1389: }
! 1390:
! 1391: if (IS_CCLASS_NOT(cc))
! 1392: return !found;
! 1393: else
! 1394: return found;
! 1395: }
! 1396:
! 1397: extern int
! 1398: onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
! 1399: {
! 1400: int len;
! 1401:
! 1402: if (ONIGENC_MBC_MINLEN(enc) > 1) {
! 1403: len = 2;
! 1404: }
! 1405: else {
! 1406: len = ONIGENC_CODE_TO_MBCLEN(enc, code);
! 1407: }
! 1408: return is_code_in_cc(len, code, cc);
! 1409: }
! 1410:
! 1411:
! 1412: /* matching region of POSIX API */
! 1413: typedef int regoff_t;
! 1414:
! 1415: typedef struct {
! 1416: regoff_t rm_so;
! 1417: regoff_t rm_eo;
! 1418: } posix_regmatch_t;
! 1419:
! 1420: /* match data(str - end) from position (sstart). */
! 1421: /* if sstart == str then set sprev to NULL. */
! 1422: static int
! 1423: match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
! 1424: UChar* sprev, MatchArg* msa)
! 1425: {
! 1426: static UChar FinishCode[] = { OP_FINISH };
! 1427:
! 1428: int i, n, num_mem, best_len, pop_level;
! 1429: LengthType tlen, tlen2;
! 1430: MemNumType mem;
! 1431: RelAddrType addr;
! 1432: OnigOptionType option = reg->options;
! 1433: OnigEncoding encode = reg->enc;
! 1434: OnigAmbigType ambig_flag = reg->ambig_flag;
! 1435: UChar *s, *q, *sbegin;
! 1436: UChar *p = reg->p;
! 1437: char *alloca_base;
! 1438: StackType *stk_alloc, *stk_base, *stk, *stk_end;
! 1439: StackType *stkp; /* used as any purpose. */
! 1440: StackIndex si;
! 1441: StackIndex *repeat_stk;
! 1442: StackIndex *mem_start_stk, *mem_end_stk;
! 1443: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 1444: int scv;
! 1445: unsigned char* state_check_buff = msa->state_check_buff;
! 1446: int num_comb_exp_check = reg->num_comb_exp_check;
! 1447: #endif
! 1448: n = reg->num_repeat + reg->num_mem * 2;
! 1449:
! 1450: STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
! 1451: pop_level = reg->stack_pop_level;
! 1452: num_mem = reg->num_mem;
! 1453: repeat_stk = (StackIndex* )alloca_base;
! 1454:
! 1455: mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
! 1456: mem_end_stk = mem_start_stk + num_mem;
! 1457: mem_start_stk--; /* for index start from 1,
! 1458: mem_start_stk[1]..mem_start_stk[num_mem] */
! 1459: mem_end_stk--; /* for index start from 1,
! 1460: mem_end_stk[1]..mem_end_stk[num_mem] */
! 1461: for (i = 1; i <= num_mem; i++) {
! 1462: mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
! 1463: }
! 1464:
! 1465: #ifdef ONIG_DEBUG_MATCH
! 1466: fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
! 1467: (int )str, (int )end, (int )sstart, (int )sprev);
! 1468: fprintf(stderr, "size: %d, start offset: %d\n",
! 1469: (int )(end - str), (int )(sstart - str));
! 1470: #endif
! 1471:
! 1472: STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
! 1473: best_len = ONIG_MISMATCH;
! 1474: s = (UChar* )sstart;
! 1475: while (1) {
! 1476: #ifdef ONIG_DEBUG_MATCH
! 1477: {
! 1478: UChar *q, *bp, buf[50];
! 1479: int len;
! 1480: fprintf(stderr, "%4d> \"", (int )(s - str));
! 1481: bp = buf;
! 1482: for (i = 0, q = s; i < 7 && q < end; i++) {
! 1483: len = enc_len(encode, q);
! 1484: while (len-- > 0) *bp++ = *q++;
! 1485: }
! 1486: if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
! 1487: else { xmemcpy(bp, "\"", 1); bp += 1; }
! 1488: *bp = 0;
! 1489: fputs(buf, stderr);
! 1490: for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
! 1491: onig_print_compiled_byte_code(stderr, p, NULL, encode);
! 1492: fprintf(stderr, "\n");
! 1493: }
! 1494: #endif
! 1495:
! 1496: sbegin = s;
! 1497: switch (*p++) {
! 1498: case OP_END: STAT_OP_IN(OP_END);
! 1499: n = s - sstart;
! 1500: if (n > best_len) {
! 1501: OnigRegion* region;
! 1502: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
! 1503: if (IS_FIND_LONGEST(option)) {
! 1504: if (n > msa->best_len) {
! 1505: msa->best_len = n;
! 1506: msa->best_s = (UChar* )sstart;
! 1507: }
! 1508: else
! 1509: goto end_best_len;
! 1510: }
! 1511: #endif
! 1512: best_len = n;
! 1513: region = msa->region;
! 1514: if (region) {
! 1515: #ifdef USE_POSIX_REGION_OPTION
! 1516: if (IS_POSIX_REGION(msa->options)) {
! 1517: posix_regmatch_t* rmt = (posix_regmatch_t* )region;
! 1518:
! 1519: rmt[0].rm_so = sstart - str;
! 1520: rmt[0].rm_eo = s - str;
! 1521: for (i = 1; i <= num_mem; i++) {
! 1522: if (mem_end_stk[i] != INVALID_STACK_INDEX) {
! 1523: if (BIT_STATUS_AT(reg->bt_mem_start, i))
! 1524: rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
! 1525: else
! 1526: rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
! 1527:
! 1528: rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
! 1529: ? STACK_AT(mem_end_stk[i])->u.mem.pstr
! 1530: : (UChar* )((void* )mem_end_stk[i])) - str;
! 1531: }
! 1532: else {
! 1533: rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
! 1534: }
! 1535: }
! 1536: }
! 1537: else {
! 1538: #endif /* USE_POSIX_REGION_OPTION */
! 1539: region->beg[0] = sstart - str;
! 1540: region->end[0] = s - str;
! 1541: for (i = 1; i <= num_mem; i++) {
! 1542: if (mem_end_stk[i] != INVALID_STACK_INDEX) {
! 1543: if (BIT_STATUS_AT(reg->bt_mem_start, i))
! 1544: region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
! 1545: else
! 1546: region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
! 1547:
! 1548: region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
! 1549: ? STACK_AT(mem_end_stk[i])->u.mem.pstr
! 1550: : (UChar* )((void* )mem_end_stk[i])) - str;
! 1551: }
! 1552: else {
! 1553: region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
! 1554: }
! 1555: }
! 1556:
! 1557: #ifdef USE_CAPTURE_HISTORY
! 1558: if (reg->capture_history != 0) {
! 1559: int r;
! 1560: OnigCaptureTreeNode* node;
! 1561:
! 1562: if (IS_NULL(region->history_root)) {
! 1563: region->history_root = node = history_node_new();
! 1564: CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
! 1565: }
! 1566: else {
! 1567: node = region->history_root;
! 1568: history_tree_clear(node);
! 1569: }
! 1570:
! 1571: node->group = 0;
! 1572: node->beg = sstart - str;
! 1573: node->end = s - str;
! 1574:
! 1575: stkp = stk_base;
! 1576: r = make_capture_history_tree(region->history_root, &stkp,
! 1577: stk, (UChar* )str, reg);
! 1578: if (r < 0) {
! 1579: best_len = r; /* error code */
! 1580: goto finish;
! 1581: }
! 1582: }
! 1583: #endif /* USE_CAPTURE_HISTORY */
! 1584: #ifdef USE_POSIX_REGION_OPTION
! 1585: } /* else IS_POSIX_REGION() */
! 1586: #endif
! 1587: } /* if (region) */
! 1588: } /* n > best_len */
! 1589:
! 1590: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
! 1591: end_best_len:
! 1592: #endif
! 1593: STAT_OP_OUT;
! 1594:
! 1595: if (IS_FIND_CONDITION(option)) {
! 1596: if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
! 1597: best_len = ONIG_MISMATCH;
! 1598: goto fail; /* for retry */
! 1599: }
! 1600: if (IS_FIND_LONGEST(option) && s < end) {
! 1601: goto fail; /* for retry */
! 1602: }
! 1603: }
! 1604:
! 1605: /* default behavior: return first-matching result. */
! 1606: goto finish;
! 1607: break;
! 1608:
! 1609: case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
! 1610: #if 0
! 1611: DATA_ENSURE(1);
! 1612: if (*p != *s) goto fail;
! 1613: p++; s++;
! 1614: #endif
! 1615: if (*p != *s++) goto fail;
! 1616: DATA_ENSURE(0);
! 1617: p++;
! 1618: STAT_OP_OUT;
! 1619: break;
! 1620:
! 1621: case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
! 1622: {
! 1623: int len;
! 1624: UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
! 1625:
! 1626: DATA_ENSURE(1);
! 1627: ss = s;
! 1628: sp = p;
! 1629:
! 1630: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
! 1631: DATA_ENSURE(0);
! 1632: q = lowbuf;
! 1633: while (len-- > 0) {
! 1634: if (*p != *q) {
! 1635: goto fail;
! 1636: }
! 1637: p++; q++;
! 1638: }
! 1639: }
! 1640: STAT_OP_OUT;
! 1641: break;
! 1642:
! 1643: case OP_EXACT2: STAT_OP_IN(OP_EXACT2);
! 1644: DATA_ENSURE(2);
! 1645: if (*p != *s) goto fail;
! 1646: p++; s++;
! 1647: if (*p != *s) goto fail;
! 1648: sprev = s;
! 1649: p++; s++;
! 1650: STAT_OP_OUT;
! 1651: continue;
! 1652: break;
! 1653:
! 1654: case OP_EXACT3: STAT_OP_IN(OP_EXACT3);
! 1655: DATA_ENSURE(3);
! 1656: if (*p != *s) goto fail;
! 1657: p++; s++;
! 1658: if (*p != *s) goto fail;
! 1659: p++; s++;
! 1660: if (*p != *s) goto fail;
! 1661: sprev = s;
! 1662: p++; s++;
! 1663: STAT_OP_OUT;
! 1664: continue;
! 1665: break;
! 1666:
! 1667: case OP_EXACT4: STAT_OP_IN(OP_EXACT4);
! 1668: DATA_ENSURE(4);
! 1669: if (*p != *s) goto fail;
! 1670: p++; s++;
! 1671: if (*p != *s) goto fail;
! 1672: p++; s++;
! 1673: if (*p != *s) goto fail;
! 1674: p++; s++;
! 1675: if (*p != *s) goto fail;
! 1676: sprev = s;
! 1677: p++; s++;
! 1678: STAT_OP_OUT;
! 1679: continue;
! 1680: break;
! 1681:
! 1682: case OP_EXACT5: STAT_OP_IN(OP_EXACT5);
! 1683: DATA_ENSURE(5);
! 1684: if (*p != *s) goto fail;
! 1685: p++; s++;
! 1686: if (*p != *s) goto fail;
! 1687: p++; s++;
! 1688: if (*p != *s) goto fail;
! 1689: p++; s++;
! 1690: if (*p != *s) goto fail;
! 1691: p++; s++;
! 1692: if (*p != *s) goto fail;
! 1693: sprev = s;
! 1694: p++; s++;
! 1695: STAT_OP_OUT;
! 1696: continue;
! 1697: break;
! 1698:
! 1699: case OP_EXACTN: STAT_OP_IN(OP_EXACTN);
! 1700: GET_LENGTH_INC(tlen, p);
! 1701: DATA_ENSURE(tlen);
! 1702: while (tlen-- > 0) {
! 1703: if (*p++ != *s++) goto fail;
! 1704: }
! 1705: sprev = s - 1;
! 1706: STAT_OP_OUT;
! 1707: continue;
! 1708: break;
! 1709:
! 1710: case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
! 1711: {
! 1712: int len;
! 1713: UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
! 1714:
! 1715: GET_LENGTH_INC(tlen, p);
! 1716: endp = p + tlen;
! 1717:
! 1718: while (p < endp) {
! 1719: sprev = s;
! 1720: DATA_ENSURE(1);
! 1721: ss = s;
! 1722: sp = p;
! 1723:
! 1724: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
! 1725: DATA_ENSURE(0);
! 1726: q = lowbuf;
! 1727: while (len-- > 0) {
! 1728: if (*p != *q) {
! 1729: goto fail;
! 1730: }
! 1731: p++; q++;
! 1732: }
! 1733: }
! 1734: }
! 1735:
! 1736: STAT_OP_OUT;
! 1737: continue;
! 1738: break;
! 1739:
! 1740: case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1);
! 1741: DATA_ENSURE(2);
! 1742: if (*p != *s) goto fail;
! 1743: p++; s++;
! 1744: if (*p != *s) goto fail;
! 1745: p++; s++;
! 1746: STAT_OP_OUT;
! 1747: break;
! 1748:
! 1749: case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2);
! 1750: DATA_ENSURE(4);
! 1751: if (*p != *s) goto fail;
! 1752: p++; s++;
! 1753: if (*p != *s) goto fail;
! 1754: p++; s++;
! 1755: sprev = s;
! 1756: if (*p != *s) goto fail;
! 1757: p++; s++;
! 1758: if (*p != *s) goto fail;
! 1759: p++; s++;
! 1760: STAT_OP_OUT;
! 1761: continue;
! 1762: break;
! 1763:
! 1764: case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3);
! 1765: DATA_ENSURE(6);
! 1766: if (*p != *s) goto fail;
! 1767: p++; s++;
! 1768: if (*p != *s) goto fail;
! 1769: p++; s++;
! 1770: if (*p != *s) goto fail;
! 1771: p++; s++;
! 1772: if (*p != *s) goto fail;
! 1773: p++; s++;
! 1774: sprev = s;
! 1775: if (*p != *s) goto fail;
! 1776: p++; s++;
! 1777: if (*p != *s) goto fail;
! 1778: p++; s++;
! 1779: STAT_OP_OUT;
! 1780: continue;
! 1781: break;
! 1782:
! 1783: case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N);
! 1784: GET_LENGTH_INC(tlen, p);
! 1785: DATA_ENSURE(tlen * 2);
! 1786: while (tlen-- > 0) {
! 1787: if (*p != *s) goto fail;
! 1788: p++; s++;
! 1789: if (*p != *s) goto fail;
! 1790: p++; s++;
! 1791: }
! 1792: sprev = s - 2;
! 1793: STAT_OP_OUT;
! 1794: continue;
! 1795: break;
! 1796:
! 1797: case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N);
! 1798: GET_LENGTH_INC(tlen, p);
! 1799: DATA_ENSURE(tlen * 3);
! 1800: while (tlen-- > 0) {
! 1801: if (*p != *s) goto fail;
! 1802: p++; s++;
! 1803: if (*p != *s) goto fail;
! 1804: p++; s++;
! 1805: if (*p != *s) goto fail;
! 1806: p++; s++;
! 1807: }
! 1808: sprev = s - 3;
! 1809: STAT_OP_OUT;
! 1810: continue;
! 1811: break;
! 1812:
! 1813: case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN);
! 1814: GET_LENGTH_INC(tlen, p); /* mb-len */
! 1815: GET_LENGTH_INC(tlen2, p); /* string len */
! 1816: tlen2 *= tlen;
! 1817: DATA_ENSURE(tlen2);
! 1818: while (tlen2-- > 0) {
! 1819: if (*p != *s) goto fail;
! 1820: p++; s++;
! 1821: }
! 1822: sprev = s - tlen;
! 1823: STAT_OP_OUT;
! 1824: continue;
! 1825: break;
! 1826:
! 1827: case OP_CCLASS: STAT_OP_IN(OP_CCLASS);
! 1828: DATA_ENSURE(1);
! 1829: if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
! 1830: p += SIZE_BITSET;
! 1831: s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
! 1832: STAT_OP_OUT;
! 1833: break;
! 1834:
! 1835: case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
! 1836: if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
! 1837:
! 1838: cclass_mb:
! 1839: GET_LENGTH_INC(tlen, p);
! 1840: {
! 1841: OnigCodePoint code;
! 1842: UChar *ss;
! 1843: int mb_len;
! 1844:
! 1845: DATA_ENSURE(1);
! 1846: mb_len = enc_len(encode, s);
! 1847: DATA_ENSURE(mb_len);
! 1848: ss = s;
! 1849: s += mb_len;
! 1850: code = ONIGENC_MBC_TO_CODE(encode, ss, s);
! 1851:
! 1852: #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
! 1853: if (! onig_is_in_code_range(p, code)) goto fail;
! 1854: #else
! 1855: q = p;
! 1856: ALIGNMENT_RIGHT(q);
! 1857: if (! onig_is_in_code_range(q, code)) goto fail;
! 1858: #endif
! 1859: }
! 1860: p += tlen;
! 1861: STAT_OP_OUT;
! 1862: break;
! 1863:
! 1864: case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
! 1865: DATA_ENSURE(1);
! 1866: if (ONIGENC_IS_MBC_HEAD(encode, s)) {
! 1867: p += SIZE_BITSET;
! 1868: goto cclass_mb;
! 1869: }
! 1870: else {
! 1871: if (BITSET_AT(((BitSetRef )p), *s) == 0)
! 1872: goto fail;
! 1873:
! 1874: p += SIZE_BITSET;
! 1875: GET_LENGTH_INC(tlen, p);
! 1876: p += tlen;
! 1877: s++;
! 1878: }
! 1879: STAT_OP_OUT;
! 1880: break;
! 1881:
! 1882: case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT);
! 1883: DATA_ENSURE(1);
! 1884: if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
! 1885: p += SIZE_BITSET;
! 1886: s += enc_len(encode, s);
! 1887: STAT_OP_OUT;
! 1888: break;
! 1889:
! 1890: case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
! 1891: DATA_ENSURE(1);
! 1892: if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
! 1893: s++;
! 1894: GET_LENGTH_INC(tlen, p);
! 1895: p += tlen;
! 1896: goto cc_mb_not_success;
! 1897: }
! 1898:
! 1899: cclass_mb_not:
! 1900: GET_LENGTH_INC(tlen, p);
! 1901: {
! 1902: OnigCodePoint code;
! 1903: UChar *ss;
! 1904: int mb_len = enc_len(encode, s);
! 1905:
! 1906: if (s + mb_len > end) {
! 1907: DATA_ENSURE(1);
! 1908: s = (UChar* )end;
! 1909: p += tlen;
! 1910: goto cc_mb_not_success;
! 1911: }
! 1912:
! 1913: ss = s;
! 1914: s += mb_len;
! 1915: code = ONIGENC_MBC_TO_CODE(encode, ss, s);
! 1916:
! 1917: #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
! 1918: if (onig_is_in_code_range(p, code)) goto fail;
! 1919: #else
! 1920: q = p;
! 1921: ALIGNMENT_RIGHT(q);
! 1922: if (onig_is_in_code_range(q, code)) goto fail;
! 1923: #endif
! 1924: }
! 1925: p += tlen;
! 1926:
! 1927: cc_mb_not_success:
! 1928: STAT_OP_OUT;
! 1929: break;
! 1930:
! 1931: case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
! 1932: DATA_ENSURE(1);
! 1933: if (ONIGENC_IS_MBC_HEAD(encode, s)) {
! 1934: p += SIZE_BITSET;
! 1935: goto cclass_mb_not;
! 1936: }
! 1937: else {
! 1938: if (BITSET_AT(((BitSetRef )p), *s) != 0)
! 1939: goto fail;
! 1940:
! 1941: p += SIZE_BITSET;
! 1942: GET_LENGTH_INC(tlen, p);
! 1943: p += tlen;
! 1944: s++;
! 1945: }
! 1946: STAT_OP_OUT;
! 1947: break;
! 1948:
! 1949: case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
! 1950: {
! 1951: OnigCodePoint code;
! 1952: void *node;
! 1953: int mb_len;
! 1954: UChar *ss;
! 1955:
! 1956: DATA_ENSURE(1);
! 1957: GET_POINTER_INC(node, p);
! 1958: mb_len = enc_len(encode, s);
! 1959: ss = s;
! 1960: s += mb_len;
! 1961: DATA_ENSURE(0);
! 1962: code = ONIGENC_MBC_TO_CODE(encode, ss, s);
! 1963: if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
! 1964: }
! 1965: STAT_OP_OUT;
! 1966: break;
! 1967:
! 1968: case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
! 1969: DATA_ENSURE(1);
! 1970: n = enc_len(encode, s);
! 1971: DATA_ENSURE(n);
! 1972: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
! 1973: s += n;
! 1974: STAT_OP_OUT;
! 1975: break;
! 1976:
! 1977: case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
! 1978: DATA_ENSURE(1);
! 1979: n = enc_len(encode, s);
! 1980: DATA_ENSURE(n);
! 1981: s += n;
! 1982: STAT_OP_OUT;
! 1983: break;
! 1984:
! 1985: case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
! 1986: while (s < end) {
! 1987: STACK_PUSH_ALT(p, s, sprev);
! 1988: n = enc_len(encode, s);
! 1989: DATA_ENSURE(n);
! 1990: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
! 1991: sprev = s;
! 1992: s += n;
! 1993: }
! 1994: STAT_OP_OUT;
! 1995: break;
! 1996:
! 1997: case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
! 1998: while (s < end) {
! 1999: STACK_PUSH_ALT(p, s, sprev);
! 2000: n = enc_len(encode, s);
! 2001: if (n > 1) {
! 2002: DATA_ENSURE(n);
! 2003: sprev = s;
! 2004: s += n;
! 2005: }
! 2006: else {
! 2007: sprev = s;
! 2008: s++;
! 2009: }
! 2010: }
! 2011: STAT_OP_OUT;
! 2012: break;
! 2013:
! 2014: case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
! 2015: while (s < end) {
! 2016: if (*p == *s) {
! 2017: STACK_PUSH_ALT(p + 1, s, sprev);
! 2018: }
! 2019: n = enc_len(encode, s);
! 2020: DATA_ENSURE(n);
! 2021: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
! 2022: sprev = s;
! 2023: s += n;
! 2024: }
! 2025: p++;
! 2026: STAT_OP_OUT;
! 2027: break;
! 2028:
! 2029: case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
! 2030: while (s < end) {
! 2031: if (*p == *s) {
! 2032: STACK_PUSH_ALT(p + 1, s, sprev);
! 2033: }
! 2034: n = enc_len(encode, s);
! 2035: if (n >1) {
! 2036: DATA_ENSURE(n);
! 2037: sprev = s;
! 2038: s += n;
! 2039: }
! 2040: else {
! 2041: sprev = s;
! 2042: s++;
! 2043: }
! 2044: }
! 2045: p++;
! 2046: STAT_OP_OUT;
! 2047: break;
! 2048:
! 2049: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 2050: case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
! 2051: GET_STATE_CHECK_NUM_INC(mem, p);
! 2052: while (s < end) {
! 2053: STATE_CHECK_VAL(scv, mem);
! 2054: if (scv) goto fail;
! 2055:
! 2056: STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
! 2057: n = enc_len(encode, s);
! 2058: DATA_ENSURE(n);
! 2059: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
! 2060: sprev = s;
! 2061: s += n;
! 2062: }
! 2063: STAT_OP_OUT;
! 2064: break;
! 2065:
! 2066: case OP_STATE_CHECK_ANYCHAR_ML_STAR:
! 2067: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
! 2068:
! 2069: GET_STATE_CHECK_NUM_INC(mem, p);
! 2070: while (s < end) {
! 2071: STATE_CHECK_VAL(scv, mem);
! 2072: if (scv) goto fail;
! 2073:
! 2074: STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
! 2075: n = enc_len(encode, s);
! 2076: if (n > 1) {
! 2077: DATA_ENSURE(n);
! 2078: sprev = s;
! 2079: s += n;
! 2080: }
! 2081: else {
! 2082: sprev = s;
! 2083: s++;
! 2084: }
! 2085: }
! 2086: STAT_OP_OUT;
! 2087: break;
! 2088: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
! 2089:
! 2090: case OP_WORD: STAT_OP_IN(OP_WORD);
! 2091: DATA_ENSURE(1);
! 2092: if (! ONIGENC_IS_MBC_WORD(encode, s, end))
! 2093: goto fail;
! 2094:
! 2095: s += enc_len(encode, s);
! 2096: STAT_OP_OUT;
! 2097: break;
! 2098:
! 2099: case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
! 2100: DATA_ENSURE(1);
! 2101: if (ONIGENC_IS_MBC_WORD(encode, s, end))
! 2102: goto fail;
! 2103:
! 2104: s += enc_len(encode, s);
! 2105: STAT_OP_OUT;
! 2106: break;
! 2107:
! 2108: case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND);
! 2109: if (ON_STR_BEGIN(s)) {
! 2110: DATA_ENSURE(1);
! 2111: if (! ONIGENC_IS_MBC_WORD(encode, s, end))
! 2112: goto fail;
! 2113: }
! 2114: else if (ON_STR_END(s)) {
! 2115: if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
! 2116: goto fail;
! 2117: }
! 2118: else {
! 2119: if (ONIGENC_IS_MBC_WORD(encode, s, end)
! 2120: == ONIGENC_IS_MBC_WORD(encode, sprev, end))
! 2121: goto fail;
! 2122: }
! 2123: STAT_OP_OUT;
! 2124: continue;
! 2125: break;
! 2126:
! 2127: case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
! 2128: if (ON_STR_BEGIN(s)) {
! 2129: if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
! 2130: goto fail;
! 2131: }
! 2132: else if (ON_STR_END(s)) {
! 2133: if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
! 2134: goto fail;
! 2135: }
! 2136: else {
! 2137: if (ONIGENC_IS_MBC_WORD(encode, s, end)
! 2138: != ONIGENC_IS_MBC_WORD(encode, sprev, end))
! 2139: goto fail;
! 2140: }
! 2141: STAT_OP_OUT;
! 2142: continue;
! 2143: break;
! 2144:
! 2145: #ifdef USE_WORD_BEGIN_END
! 2146: case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
! 2147: if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
! 2148: if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
! 2149: STAT_OP_OUT;
! 2150: continue;
! 2151: }
! 2152: }
! 2153: goto fail;
! 2154: break;
! 2155:
! 2156: case OP_WORD_END: STAT_OP_IN(OP_WORD_END);
! 2157: if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
! 2158: if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
! 2159: STAT_OP_OUT;
! 2160: continue;
! 2161: }
! 2162: }
! 2163: goto fail;
! 2164: break;
! 2165: #endif
! 2166:
! 2167: case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF);
! 2168: if (! ON_STR_BEGIN(s)) goto fail;
! 2169:
! 2170: STAT_OP_OUT;
! 2171: continue;
! 2172: break;
! 2173:
! 2174: case OP_END_BUF: STAT_OP_IN(OP_END_BUF);
! 2175: if (! ON_STR_END(s)) goto fail;
! 2176:
! 2177: STAT_OP_OUT;
! 2178: continue;
! 2179: break;
! 2180:
! 2181: case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE);
! 2182: if (ON_STR_BEGIN(s)) {
! 2183: if (IS_NOTBOL(msa->options)) goto fail;
! 2184: STAT_OP_OUT;
! 2185: continue;
! 2186: }
! 2187: else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
! 2188: STAT_OP_OUT;
! 2189: continue;
! 2190: }
! 2191: goto fail;
! 2192: break;
! 2193:
! 2194: case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
! 2195: if (ON_STR_END(s)) {
! 2196: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
! 2197: if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
! 2198: #endif
! 2199: if (IS_NOTEOL(msa->options)) goto fail;
! 2200: STAT_OP_OUT;
! 2201: continue;
! 2202: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
! 2203: }
! 2204: #endif
! 2205: }
! 2206: else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
! 2207: STAT_OP_OUT;
! 2208: continue;
! 2209: }
! 2210: #ifdef USE_CRNL_AS_LINE_TERMINATOR
! 2211: else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
! 2212: STAT_OP_OUT;
! 2213: continue;
! 2214: }
! 2215: #endif
! 2216: goto fail;
! 2217: break;
! 2218:
! 2219: case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
! 2220: if (ON_STR_END(s)) {
! 2221: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
! 2222: if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
! 2223: #endif
! 2224: if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
! 2225: STAT_OP_OUT;
! 2226: continue;
! 2227: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
! 2228: }
! 2229: #endif
! 2230: }
! 2231: else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
! 2232: ON_STR_END(s + enc_len(encode, s))) {
! 2233: STAT_OP_OUT;
! 2234: continue;
! 2235: }
! 2236: #ifdef USE_CRNL_AS_LINE_TERMINATOR
! 2237: else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
! 2238: UChar* ss = s + enc_len(encode, s);
! 2239: if (ON_STR_END(ss + enc_len(encode, ss))) {
! 2240: STAT_OP_OUT;
! 2241: continue;
! 2242: }
! 2243: }
! 2244: #endif
! 2245: goto fail;
! 2246: break;
! 2247:
! 2248: case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION);
! 2249: if (s != msa->start)
! 2250: goto fail;
! 2251:
! 2252: STAT_OP_OUT;
! 2253: continue;
! 2254: break;
! 2255:
! 2256: case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH);
! 2257: GET_MEMNUM_INC(mem, p);
! 2258: STACK_PUSH_MEM_START(mem, s);
! 2259: STAT_OP_OUT;
! 2260: continue;
! 2261: break;
! 2262:
! 2263: case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
! 2264: GET_MEMNUM_INC(mem, p);
! 2265: mem_start_stk[mem] = (StackIndex )((void* )s);
! 2266: STAT_OP_OUT;
! 2267: continue;
! 2268: break;
! 2269:
! 2270: case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH);
! 2271: GET_MEMNUM_INC(mem, p);
! 2272: STACK_PUSH_MEM_END(mem, s);
! 2273: STAT_OP_OUT;
! 2274: continue;
! 2275: break;
! 2276:
! 2277: case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
! 2278: GET_MEMNUM_INC(mem, p);
! 2279: mem_end_stk[mem] = (StackIndex )((void* )s);
! 2280: STAT_OP_OUT;
! 2281: continue;
! 2282: break;
! 2283:
! 2284: #ifdef USE_SUBEXP_CALL
! 2285: case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
! 2286: GET_MEMNUM_INC(mem, p);
! 2287: STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
! 2288: STACK_PUSH_MEM_END(mem, s);
! 2289: mem_start_stk[mem] = GET_STACK_INDEX(stkp);
! 2290: STAT_OP_OUT;
! 2291: continue;
! 2292: break;
! 2293:
! 2294: case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
! 2295: GET_MEMNUM_INC(mem, p);
! 2296: mem_end_stk[mem] = (StackIndex )((void* )s);
! 2297: STACK_GET_MEM_START(mem, stkp);
! 2298:
! 2299: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
! 2300: mem_start_stk[mem] = GET_STACK_INDEX(stkp);
! 2301: else
! 2302: mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
! 2303:
! 2304: STACK_PUSH_MEM_END_MARK(mem);
! 2305: STAT_OP_OUT;
! 2306: continue;
! 2307: break;
! 2308: #endif
! 2309:
! 2310: case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1);
! 2311: mem = 1;
! 2312: goto backref;
! 2313: break;
! 2314:
! 2315: case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2);
! 2316: mem = 2;
! 2317: goto backref;
! 2318: break;
! 2319:
! 2320: case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
! 2321: GET_MEMNUM_INC(mem, p);
! 2322: backref:
! 2323: {
! 2324: int len;
! 2325: UChar *pstart, *pend;
! 2326:
! 2327: /* if you want to remove following line,
! 2328: you should check in parse and compile time. */
! 2329: if (mem > num_mem) goto fail;
! 2330: if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
! 2331: if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
! 2332:
! 2333: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
! 2334: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
! 2335: else
! 2336: pstart = (UChar* )((void* )mem_start_stk[mem]);
! 2337:
! 2338: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
! 2339: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
! 2340: : (UChar* )((void* )mem_end_stk[mem]));
! 2341: n = pend - pstart;
! 2342: DATA_ENSURE(n);
! 2343: sprev = s;
! 2344: STRING_CMP(pstart, s, n);
! 2345: while (sprev + (len = enc_len(encode, sprev)) < s)
! 2346: sprev += len;
! 2347:
! 2348: STAT_OP_OUT;
! 2349: continue;
! 2350: }
! 2351: break;
! 2352:
! 2353: case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC);
! 2354: GET_MEMNUM_INC(mem, p);
! 2355: {
! 2356: int len;
! 2357: UChar *pstart, *pend;
! 2358:
! 2359: /* if you want to remove following line,
! 2360: you should check in parse and compile time. */
! 2361: if (mem > num_mem) goto fail;
! 2362: if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
! 2363: if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
! 2364:
! 2365: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
! 2366: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
! 2367: else
! 2368: pstart = (UChar* )((void* )mem_start_stk[mem]);
! 2369:
! 2370: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
! 2371: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
! 2372: : (UChar* )((void* )mem_end_stk[mem]));
! 2373: n = pend - pstart;
! 2374: DATA_ENSURE(n);
! 2375: sprev = s;
! 2376: STRING_CMP_IC(ambig_flag, pstart, &s, n);
! 2377: while (sprev + (len = enc_len(encode, sprev)) < s)
! 2378: sprev += len;
! 2379:
! 2380: STAT_OP_OUT;
! 2381: continue;
! 2382: }
! 2383: break;
! 2384:
! 2385: case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI);
! 2386: {
! 2387: int len, is_fail;
! 2388: UChar *pstart, *pend, *swork;
! 2389:
! 2390: GET_LENGTH_INC(tlen, p);
! 2391: for (i = 0; i < tlen; i++) {
! 2392: GET_MEMNUM_INC(mem, p);
! 2393:
! 2394: if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
! 2395: if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
! 2396:
! 2397: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
! 2398: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
! 2399: else
! 2400: pstart = (UChar* )((void* )mem_start_stk[mem]);
! 2401:
! 2402: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
! 2403: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
! 2404: : (UChar* )((void* )mem_end_stk[mem]));
! 2405: n = pend - pstart;
! 2406: DATA_ENSURE(n);
! 2407: sprev = s;
! 2408: swork = s;
! 2409: STRING_CMP_VALUE(pstart, swork, n, is_fail);
! 2410: if (is_fail) continue;
! 2411: s = swork;
! 2412: while (sprev + (len = enc_len(encode, sprev)) < s)
! 2413: sprev += len;
! 2414:
! 2415: p += (SIZE_MEMNUM * (tlen - i - 1));
! 2416: break; /* success */
! 2417: }
! 2418: if (i == tlen) goto fail;
! 2419: STAT_OP_OUT;
! 2420: continue;
! 2421: }
! 2422: break;
! 2423:
! 2424: case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC);
! 2425: {
! 2426: int len, is_fail;
! 2427: UChar *pstart, *pend, *swork;
! 2428:
! 2429: GET_LENGTH_INC(tlen, p);
! 2430: for (i = 0; i < tlen; i++) {
! 2431: GET_MEMNUM_INC(mem, p);
! 2432:
! 2433: if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
! 2434: if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
! 2435:
! 2436: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
! 2437: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
! 2438: else
! 2439: pstart = (UChar* )((void* )mem_start_stk[mem]);
! 2440:
! 2441: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
! 2442: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
! 2443: : (UChar* )((void* )mem_end_stk[mem]));
! 2444: n = pend - pstart;
! 2445: DATA_ENSURE(n);
! 2446: sprev = s;
! 2447: swork = s;
! 2448: STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
! 2449: if (is_fail) continue;
! 2450: s = swork;
! 2451: while (sprev + (len = enc_len(encode, sprev)) < s)
! 2452: sprev += len;
! 2453:
! 2454: p += (SIZE_MEMNUM * (tlen - i - 1));
! 2455: break; /* success */
! 2456: }
! 2457: if (i == tlen) goto fail;
! 2458: STAT_OP_OUT;
! 2459: continue;
! 2460: }
! 2461: break;
! 2462:
! 2463: #ifdef USE_BACKREF_AT_LEVEL
! 2464: case OP_BACKREF_AT_LEVEL:
! 2465: {
! 2466: int len;
! 2467: OnigOptionType ic;
! 2468: LengthType level;
! 2469:
! 2470: GET_OPTION_INC(ic, p);
! 2471: GET_LENGTH_INC(level, p);
! 2472: GET_LENGTH_INC(tlen, p);
! 2473:
! 2474: sprev = s;
! 2475: if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
! 2476: , (int )level, (int )tlen, p, &s, end)) {
! 2477: while (sprev + (len = enc_len(encode, sprev)) < s)
! 2478: sprev += len;
! 2479:
! 2480: p += (SIZE_MEMNUM * tlen);
! 2481: }
! 2482: else
! 2483: goto fail;
! 2484:
! 2485: STAT_OP_OUT;
! 2486: continue;
! 2487: }
! 2488:
! 2489: break;
! 2490: #endif
! 2491:
! 2492: case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
! 2493: GET_OPTION_INC(option, p);
! 2494: STACK_PUSH_ALT(p, s, sprev);
! 2495: p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
! 2496: STAT_OP_OUT;
! 2497: continue;
! 2498: break;
! 2499:
! 2500: case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
! 2501: GET_OPTION_INC(option, p);
! 2502: STAT_OP_OUT;
! 2503: continue;
! 2504: break;
! 2505:
! 2506: case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START);
! 2507: GET_MEMNUM_INC(mem, p); /* mem: null check id */
! 2508: STACK_PUSH_NULL_CHECK_START(mem, s);
! 2509: STAT_OP_OUT;
! 2510: continue;
! 2511: break;
! 2512:
! 2513: case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END);
! 2514: {
! 2515: int isnull;
! 2516:
! 2517: GET_MEMNUM_INC(mem, p); /* mem: null check id */
! 2518: STACK_NULL_CHECK(isnull, mem, s);
! 2519: if (isnull) {
! 2520: #ifdef ONIG_DEBUG_MATCH
! 2521: fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
! 2522: (int )mem, (int )s);
! 2523: #endif
! 2524: null_check_found:
! 2525: /* empty loop founded, skip next instruction */
! 2526: switch (*p++) {
! 2527: case OP_JUMP:
! 2528: case OP_PUSH:
! 2529: p += SIZE_RELADDR;
! 2530: break;
! 2531: case OP_REPEAT_INC:
! 2532: case OP_REPEAT_INC_NG:
! 2533: case OP_REPEAT_INC_SG:
! 2534: case OP_REPEAT_INC_NG_SG:
! 2535: p += SIZE_MEMNUM;
! 2536: break;
! 2537: default:
! 2538: goto unexpected_bytecode_error;
! 2539: break;
! 2540: }
! 2541: }
! 2542: }
! 2543: STAT_OP_OUT;
! 2544: continue;
! 2545: break;
! 2546:
! 2547: #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
! 2548: case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
! 2549: {
! 2550: int isnull;
! 2551:
! 2552: GET_MEMNUM_INC(mem, p); /* mem: null check id */
! 2553: STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
! 2554: if (isnull) {
! 2555: #ifdef ONIG_DEBUG_MATCH
! 2556: fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
! 2557: (int )mem, (int )s);
! 2558: #endif
! 2559: if (isnull == -1) goto fail;
! 2560: goto null_check_found;
! 2561: }
! 2562: }
! 2563: STAT_OP_OUT;
! 2564: continue;
! 2565: break;
! 2566: #endif
! 2567:
! 2568: #ifdef USE_SUBEXP_CALL
! 2569: case OP_NULL_CHECK_END_MEMST_PUSH:
! 2570: STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
! 2571: {
! 2572: int isnull;
! 2573:
! 2574: GET_MEMNUM_INC(mem, p); /* mem: null check id */
! 2575: #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
! 2576: STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
! 2577: #else
! 2578: STACK_NULL_CHECK_REC(isnull, mem, s);
! 2579: #endif
! 2580: if (isnull) {
! 2581: #ifdef ONIG_DEBUG_MATCH
! 2582: fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
! 2583: (int )mem, (int )s);
! 2584: #endif
! 2585: if (isnull == -1) goto fail;
! 2586: goto null_check_found;
! 2587: }
! 2588: else {
! 2589: STACK_PUSH_NULL_CHECK_END(mem);
! 2590: }
! 2591: }
! 2592: STAT_OP_OUT;
! 2593: continue;
! 2594: break;
! 2595: #endif
! 2596:
! 2597: case OP_JUMP: STAT_OP_IN(OP_JUMP);
! 2598: GET_RELADDR_INC(addr, p);
! 2599: p += addr;
! 2600: STAT_OP_OUT;
! 2601: CHECK_INTERRUPT_IN_MATCH_AT;
! 2602: continue;
! 2603: break;
! 2604:
! 2605: case OP_PUSH: STAT_OP_IN(OP_PUSH);
! 2606: GET_RELADDR_INC(addr, p);
! 2607: STACK_PUSH_ALT(p + addr, s, sprev);
! 2608: STAT_OP_OUT;
! 2609: continue;
! 2610: break;
! 2611:
! 2612: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 2613: case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
! 2614: GET_STATE_CHECK_NUM_INC(mem, p);
! 2615: STATE_CHECK_VAL(scv, mem);
! 2616: if (scv) goto fail;
! 2617:
! 2618: GET_RELADDR_INC(addr, p);
! 2619: STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
! 2620: STAT_OP_OUT;
! 2621: continue;
! 2622: break;
! 2623:
! 2624: case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
! 2625: GET_STATE_CHECK_NUM_INC(mem, p);
! 2626: GET_RELADDR_INC(addr, p);
! 2627: STATE_CHECK_VAL(scv, mem);
! 2628: if (scv) {
! 2629: p += addr;
! 2630: }
! 2631: else {
! 2632: STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
! 2633: }
! 2634: STAT_OP_OUT;
! 2635: continue;
! 2636: break;
! 2637:
! 2638: case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
! 2639: GET_STATE_CHECK_NUM_INC(mem, p);
! 2640: STATE_CHECK_VAL(scv, mem);
! 2641: if (scv) goto fail;
! 2642:
! 2643: STACK_PUSH_STATE_CHECK(s, mem);
! 2644: STAT_OP_OUT;
! 2645: continue;
! 2646: break;
! 2647: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
! 2648:
! 2649: case OP_POP: STAT_OP_IN(OP_POP);
! 2650: STACK_POP_ONE;
! 2651: STAT_OP_OUT;
! 2652: continue;
! 2653: break;
! 2654:
! 2655: case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
! 2656: GET_RELADDR_INC(addr, p);
! 2657: if (*p == *s && DATA_ENSURE_CHECK(1)) {
! 2658: p++;
! 2659: STACK_PUSH_ALT(p + addr, s, sprev);
! 2660: STAT_OP_OUT;
! 2661: continue;
! 2662: }
! 2663: p += (addr + 1);
! 2664: STAT_OP_OUT;
! 2665: continue;
! 2666: break;
! 2667:
! 2668: case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
! 2669: GET_RELADDR_INC(addr, p);
! 2670: if (*p == *s) {
! 2671: p++;
! 2672: STACK_PUSH_ALT(p + addr, s, sprev);
! 2673: STAT_OP_OUT;
! 2674: continue;
! 2675: }
! 2676: p++;
! 2677: STAT_OP_OUT;
! 2678: continue;
! 2679: break;
! 2680:
! 2681: case OP_REPEAT: STAT_OP_IN(OP_REPEAT);
! 2682: {
! 2683: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
! 2684: GET_RELADDR_INC(addr, p);
! 2685:
! 2686: STACK_ENSURE(1);
! 2687: repeat_stk[mem] = GET_STACK_INDEX(stk);
! 2688: STACK_PUSH_REPEAT(mem, p);
! 2689:
! 2690: if (reg->repeat_range[mem].lower == 0) {
! 2691: STACK_PUSH_ALT(p + addr, s, sprev);
! 2692: }
! 2693: }
! 2694: STAT_OP_OUT;
! 2695: continue;
! 2696: break;
! 2697:
! 2698: case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG);
! 2699: {
! 2700: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
! 2701: GET_RELADDR_INC(addr, p);
! 2702:
! 2703: STACK_ENSURE(1);
! 2704: repeat_stk[mem] = GET_STACK_INDEX(stk);
! 2705: STACK_PUSH_REPEAT(mem, p);
! 2706:
! 2707: if (reg->repeat_range[mem].lower == 0) {
! 2708: STACK_PUSH_ALT(p, s, sprev);
! 2709: p += addr;
! 2710: }
! 2711: }
! 2712: STAT_OP_OUT;
! 2713: continue;
! 2714: break;
! 2715:
! 2716: case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
! 2717: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
! 2718: si = repeat_stk[mem];
! 2719: stkp = STACK_AT(si);
! 2720:
! 2721: repeat_inc:
! 2722: stkp->u.repeat.count++;
! 2723: if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
! 2724: /* end of repeat. Nothing to do. */
! 2725: }
! 2726: else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
! 2727: STACK_PUSH_ALT(p, s, sprev);
! 2728: p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
! 2729: }
! 2730: else {
! 2731: p = stkp->u.repeat.pcode;
! 2732: }
! 2733: STACK_PUSH_REPEAT_INC(si);
! 2734: STAT_OP_OUT;
! 2735: CHECK_INTERRUPT_IN_MATCH_AT;
! 2736: continue;
! 2737: break;
! 2738:
! 2739: case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG);
! 2740: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
! 2741: STACK_GET_REPEAT(mem, stkp);
! 2742: si = GET_STACK_INDEX(stkp);
! 2743: goto repeat_inc;
! 2744: break;
! 2745:
! 2746: case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
! 2747: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
! 2748: si = repeat_stk[mem];
! 2749: stkp = STACK_AT(si);
! 2750:
! 2751: repeat_inc_ng:
! 2752: stkp->u.repeat.count++;
! 2753: if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
! 2754: if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
! 2755: UChar* pcode = stkp->u.repeat.pcode;
! 2756:
! 2757: STACK_PUSH_REPEAT_INC(si);
! 2758: STACK_PUSH_ALT(pcode, s, sprev);
! 2759: }
! 2760: else {
! 2761: p = stkp->u.repeat.pcode;
! 2762: STACK_PUSH_REPEAT_INC(si);
! 2763: }
! 2764: }
! 2765: else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
! 2766: STACK_PUSH_REPEAT_INC(si);
! 2767: }
! 2768: STAT_OP_OUT;
! 2769: CHECK_INTERRUPT_IN_MATCH_AT;
! 2770: continue;
! 2771: break;
! 2772:
! 2773: case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG);
! 2774: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
! 2775: STACK_GET_REPEAT(mem, stkp);
! 2776: si = GET_STACK_INDEX(stkp);
! 2777: goto repeat_inc_ng;
! 2778: break;
! 2779:
! 2780: case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
! 2781: STACK_PUSH_POS(s, sprev);
! 2782: STAT_OP_OUT;
! 2783: continue;
! 2784: break;
! 2785:
! 2786: case OP_POP_POS: STAT_OP_IN(OP_POP_POS);
! 2787: {
! 2788: STACK_POS_END(stkp);
! 2789: s = stkp->u.state.pstr;
! 2790: sprev = stkp->u.state.pstr_prev;
! 2791: }
! 2792: STAT_OP_OUT;
! 2793: continue;
! 2794: break;
! 2795:
! 2796: case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT);
! 2797: GET_RELADDR_INC(addr, p);
! 2798: STACK_PUSH_POS_NOT(p + addr, s, sprev);
! 2799: STAT_OP_OUT;
! 2800: continue;
! 2801: break;
! 2802:
! 2803: case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS);
! 2804: STACK_POP_TIL_POS_NOT;
! 2805: goto fail;
! 2806: break;
! 2807:
! 2808: case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT);
! 2809: STACK_PUSH_STOP_BT;
! 2810: STAT_OP_OUT;
! 2811: continue;
! 2812: break;
! 2813:
! 2814: case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT);
! 2815: STACK_STOP_BT_END;
! 2816: STAT_OP_OUT;
! 2817: continue;
! 2818: break;
! 2819:
! 2820: case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
! 2821: GET_LENGTH_INC(tlen, p);
! 2822: s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
! 2823: if (IS_NULL(s)) goto fail;
! 2824: sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
! 2825: STAT_OP_OUT;
! 2826: continue;
! 2827: break;
! 2828:
! 2829: case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
! 2830: GET_RELADDR_INC(addr, p);
! 2831: GET_LENGTH_INC(tlen, p);
! 2832: q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
! 2833: if (IS_NULL(q)) {
! 2834: /* too short case -> success. ex. /(?<!XXX)a/.match("a")
! 2835: If you want to change to fail, replace following line. */
! 2836: p += addr;
! 2837: /* goto fail; */
! 2838: }
! 2839: else {
! 2840: STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
! 2841: s = q;
! 2842: sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
! 2843: }
! 2844: STAT_OP_OUT;
! 2845: continue;
! 2846: break;
! 2847:
! 2848: case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
! 2849: STACK_POP_TIL_LOOK_BEHIND_NOT;
! 2850: goto fail;
! 2851: break;
! 2852:
! 2853: #ifdef USE_SUBEXP_CALL
! 2854: case OP_CALL: STAT_OP_IN(OP_CALL);
! 2855: GET_ABSADDR_INC(addr, p);
! 2856: STACK_PUSH_CALL_FRAME(p);
! 2857: p = reg->p + addr;
! 2858: STAT_OP_OUT;
! 2859: continue;
! 2860: break;
! 2861:
! 2862: case OP_RETURN: STAT_OP_IN(OP_RETURN);
! 2863: STACK_RETURN(p);
! 2864: STACK_PUSH_RETURN;
! 2865: STAT_OP_OUT;
! 2866: continue;
! 2867: break;
! 2868: #endif
! 2869:
! 2870: case OP_FINISH:
! 2871: goto finish;
! 2872: break;
! 2873:
! 2874: fail:
! 2875: STAT_OP_OUT;
! 2876: /* fall */
! 2877: case OP_FAIL: STAT_OP_IN(OP_FAIL);
! 2878: STACK_POP;
! 2879: p = stk->u.state.pcode;
! 2880: s = stk->u.state.pstr;
! 2881: sprev = stk->u.state.pstr_prev;
! 2882:
! 2883: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 2884: if (stk->u.state.state_check != 0) {
! 2885: stk->type = STK_STATE_CHECK_MARK;
! 2886: stk++;
! 2887: }
! 2888: #endif
! 2889:
! 2890: STAT_OP_OUT;
! 2891: continue;
! 2892: break;
! 2893:
! 2894: default:
! 2895: goto bytecode_error;
! 2896:
! 2897: } /* end of switch */
! 2898: sprev = sbegin;
! 2899: } /* end of while(1) */
! 2900:
! 2901: finish:
! 2902: STACK_SAVE;
! 2903: return best_len;
! 2904:
! 2905: #ifdef ONIG_DEBUG
! 2906: stack_error:
! 2907: STACK_SAVE;
! 2908: return ONIGERR_STACK_BUG;
! 2909: #endif
! 2910:
! 2911: bytecode_error:
! 2912: STACK_SAVE;
! 2913: return ONIGERR_UNDEFINED_BYTECODE;
! 2914:
! 2915: unexpected_bytecode_error:
! 2916: STACK_SAVE;
! 2917: return ONIGERR_UNEXPECTED_BYTECODE;
! 2918: }
! 2919:
! 2920:
! 2921: static UChar*
! 2922: slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
! 2923: const UChar* text, const UChar* text_end, UChar* text_range)
! 2924: {
! 2925: UChar *t, *p, *s, *end;
! 2926:
! 2927: end = (UChar* )text_end;
! 2928: end -= target_end - target - 1;
! 2929: if (end > text_range)
! 2930: end = text_range;
! 2931:
! 2932: s = (UChar* )text;
! 2933:
! 2934: while (s < end) {
! 2935: if (*s == *target) {
! 2936: p = s + 1;
! 2937: t = target + 1;
! 2938: while (t < target_end) {
! 2939: if (*t != *p++)
! 2940: break;
! 2941: t++;
! 2942: }
! 2943: if (t == target_end)
! 2944: return s;
! 2945: }
! 2946: s += enc_len(enc, s);
! 2947: }
! 2948:
! 2949: return (UChar* )NULL;
! 2950: }
! 2951:
! 2952: static int
! 2953: str_lower_case_match(OnigEncoding enc, int ambig_flag,
! 2954: const UChar* t, const UChar* tend,
! 2955: const UChar* p, const UChar* end)
! 2956: {
! 2957: int lowlen;
! 2958: UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
! 2959: const UChar* tsave;
! 2960: const UChar* psave;
! 2961:
! 2962: tsave = t;
! 2963: psave = p;
! 2964:
! 2965: while (t < tend) {
! 2966: lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
! 2967: q = lowbuf;
! 2968: while (lowlen > 0) {
! 2969: if (*t++ != *q++) {
! 2970: return 0;
! 2971: }
! 2972: lowlen--;
! 2973: }
! 2974: }
! 2975:
! 2976: return 1;
! 2977: }
! 2978:
! 2979: static UChar*
! 2980: slow_search_ic(OnigEncoding enc, int ambig_flag,
! 2981: UChar* target, UChar* target_end,
! 2982: const UChar* text, const UChar* text_end, UChar* text_range)
! 2983: {
! 2984: UChar *s, *end;
! 2985:
! 2986: end = (UChar* )text_end;
! 2987: end -= target_end - target - 1;
! 2988: if (end > text_range)
! 2989: end = text_range;
! 2990:
! 2991: s = (UChar* )text;
! 2992:
! 2993: while (s < end) {
! 2994: if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
! 2995: return s;
! 2996:
! 2997: s += enc_len(enc, s);
! 2998: }
! 2999:
! 3000: return (UChar* )NULL;
! 3001: }
! 3002:
! 3003: static UChar*
! 3004: slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
! 3005: const UChar* text, const UChar* adjust_text,
! 3006: const UChar* text_end, const UChar* text_start)
! 3007: {
! 3008: UChar *t, *p, *s;
! 3009:
! 3010: s = (UChar* )text_end;
! 3011: s -= (target_end - target);
! 3012: if (s > text_start)
! 3013: s = (UChar* )text_start;
! 3014: else
! 3015: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
! 3016:
! 3017: while (s >= text) {
! 3018: if (*s == *target) {
! 3019: p = s + 1;
! 3020: t = target + 1;
! 3021: while (t < target_end) {
! 3022: if (*t != *p++)
! 3023: break;
! 3024: t++;
! 3025: }
! 3026: if (t == target_end)
! 3027: return s;
! 3028: }
! 3029: s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
! 3030: }
! 3031:
! 3032: return (UChar* )NULL;
! 3033: }
! 3034:
! 3035: static UChar*
! 3036: slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
! 3037: UChar* target, UChar* target_end,
! 3038: const UChar* text, const UChar* adjust_text,
! 3039: const UChar* text_end, const UChar* text_start)
! 3040: {
! 3041: UChar *s;
! 3042:
! 3043: s = (UChar* )text_end;
! 3044: s -= (target_end - target);
! 3045: if (s > text_start)
! 3046: s = (UChar* )text_start;
! 3047: else
! 3048: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
! 3049:
! 3050: while (s >= text) {
! 3051: if (str_lower_case_match(enc, ambig_flag,
! 3052: target, target_end, s, text_end))
! 3053: return s;
! 3054:
! 3055: s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
! 3056: }
! 3057:
! 3058: return (UChar* )NULL;
! 3059: }
! 3060:
! 3061: static UChar*
! 3062: bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
! 3063: const UChar* text, const UChar* text_end,
! 3064: const UChar* text_range)
! 3065: {
! 3066: const UChar *s, *se, *t, *p, *end;
! 3067: const UChar *tail;
! 3068: int skip, tlen1;
! 3069:
! 3070: #ifdef ONIG_DEBUG_SEARCH
! 3071: fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
! 3072: (int )text, (int )text_end, (int )text_range);
! 3073: #endif
! 3074:
! 3075: tail = target_end - 1;
! 3076: tlen1 = tail - target;
! 3077: end = text_range;
! 3078: if (end + tlen1 > text_end)
! 3079: end = text_end - tlen1;
! 3080:
! 3081: s = text;
! 3082:
! 3083: if (IS_NULL(reg->int_map)) {
! 3084: while (s < end) {
! 3085: p = se = s + tlen1;
! 3086: t = tail;
! 3087: while (t >= target && *p == *t) {
! 3088: p--; t--;
! 3089: }
! 3090: if (t < target) return (UChar* )s;
! 3091:
! 3092: skip = reg->map[*se];
! 3093: t = s;
! 3094: do {
! 3095: s += enc_len(reg->enc, s);
! 3096: } while ((s - t) < skip && s < end);
! 3097: }
! 3098: }
! 3099: else {
! 3100: while (s < end) {
! 3101: p = se = s + tlen1;
! 3102: t = tail;
! 3103: while (t >= target && *p == *t) {
! 3104: p--; t--;
! 3105: }
! 3106: if (t < target) return (UChar* )s;
! 3107:
! 3108: skip = reg->int_map[*se];
! 3109: t = s;
! 3110: do {
! 3111: s += enc_len(reg->enc, s);
! 3112: } while ((s - t) < skip && s < end);
! 3113: }
! 3114: }
! 3115:
! 3116: return (UChar* )NULL;
! 3117: }
! 3118:
! 3119: static UChar*
! 3120: bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
! 3121: const UChar* text, const UChar* text_end, const UChar* text_range)
! 3122: {
! 3123: const UChar *s, *t, *p, *end;
! 3124: const UChar *tail;
! 3125:
! 3126: end = text_range + (target_end - target) - 1;
! 3127: if (end > text_end)
! 3128: end = text_end;
! 3129:
! 3130: tail = target_end - 1;
! 3131: s = text + (target_end - target) - 1;
! 3132: if (IS_NULL(reg->int_map)) {
! 3133: while (s < end) {
! 3134: p = s;
! 3135: t = tail;
! 3136: while (t >= target && *p == *t) {
! 3137: p--; t--;
! 3138: }
! 3139: if (t < target) return (UChar* )(p + 1);
! 3140: s += reg->map[*s];
! 3141: }
! 3142: }
! 3143: else { /* see int_map[] */
! 3144: while (s < end) {
! 3145: p = s;
! 3146: t = tail;
! 3147: while (t >= target && *p == *t) {
! 3148: p--; t--;
! 3149: }
! 3150: if (t < target) return (UChar* )(p + 1);
! 3151: s += reg->int_map[*s];
! 3152: }
! 3153: }
! 3154: return (UChar* )NULL;
! 3155: }
! 3156:
! 3157: static int
! 3158: set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
! 3159:
! 3160: {
! 3161: int i, len;
! 3162:
! 3163: if (IS_NULL(*skip)) {
! 3164: *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
! 3165: if (IS_NULL(*skip)) return ONIGERR_MEMORY;
! 3166: }
! 3167:
! 3168: len = end - s;
! 3169: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
! 3170: (*skip)[i] = len;
! 3171:
! 3172: for (i = len - 1; i > 0; i--)
! 3173: (*skip)[s[i]] = i;
! 3174:
! 3175: return 0;
! 3176: }
! 3177:
! 3178: static UChar*
! 3179: bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
! 3180: const UChar* text, const UChar* adjust_text,
! 3181: const UChar* text_end, const UChar* text_start)
! 3182: {
! 3183: const UChar *s, *t, *p;
! 3184:
! 3185: s = text_end - (target_end - target);
! 3186: if (text_start < s)
! 3187: s = text_start;
! 3188: else
! 3189: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
! 3190:
! 3191: while (s >= text) {
! 3192: p = s;
! 3193: t = target;
! 3194: while (t < target_end && *p == *t) {
! 3195: p++; t++;
! 3196: }
! 3197: if (t == target_end)
! 3198: return (UChar* )s;
! 3199:
! 3200: s -= reg->int_map_backward[*s];
! 3201: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
! 3202: }
! 3203:
! 3204: return (UChar* )NULL;
! 3205: }
! 3206:
! 3207: static UChar*
! 3208: map_search(OnigEncoding enc, UChar map[],
! 3209: const UChar* text, const UChar* text_range)
! 3210: {
! 3211: const UChar *s = text;
! 3212:
! 3213: while (s < text_range) {
! 3214: if (map[*s]) return (UChar* )s;
! 3215:
! 3216: s += enc_len(enc, s);
! 3217: }
! 3218: return (UChar* )NULL;
! 3219: }
! 3220:
! 3221: static UChar*
! 3222: map_search_backward(OnigEncoding enc, UChar map[],
! 3223: const UChar* text, const UChar* adjust_text,
! 3224: const UChar* text_start)
! 3225: {
! 3226: const UChar *s = text_start;
! 3227:
! 3228: while (s >= text) {
! 3229: if (map[*s]) return (UChar* )s;
! 3230:
! 3231: s = onigenc_get_prev_char_head(enc, adjust_text, s);
! 3232: }
! 3233: return (UChar* )NULL;
! 3234: }
! 3235:
! 3236: extern int
! 3237: onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
! 3238: OnigOptionType option)
! 3239: {
! 3240: int r;
! 3241: UChar *prev;
! 3242: MatchArg msa;
! 3243:
! 3244: #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
! 3245: start:
! 3246: THREAD_ATOMIC_START;
! 3247: if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
! 3248: ONIG_STATE_INC(reg);
! 3249: if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
! 3250: onig_chain_reduce(reg);
! 3251: ONIG_STATE_INC(reg);
! 3252: }
! 3253: }
! 3254: else {
! 3255: int n;
! 3256:
! 3257: THREAD_ATOMIC_END;
! 3258: n = 0;
! 3259: while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
! 3260: if (++n > THREAD_PASS_LIMIT_COUNT)
! 3261: return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
! 3262: THREAD_PASS;
! 3263: }
! 3264: goto start;
! 3265: }
! 3266: THREAD_ATOMIC_END;
! 3267: #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
! 3268:
! 3269: MATCH_ARG_INIT(msa, option, region, at);
! 3270: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 3271: {
! 3272: int offset = at - str;
! 3273: STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
! 3274: }
! 3275: #endif
! 3276:
! 3277: if (region
! 3278: #ifdef USE_POSIX_REGION_OPTION
! 3279: && !IS_POSIX_REGION(option)
! 3280: #endif
! 3281: ) {
! 3282: r = onig_region_resize_clear(region, reg->num_mem + 1);
! 3283: }
! 3284: else
! 3285: r = 0;
! 3286:
! 3287: if (r == 0) {
! 3288: prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
! 3289: r = match_at(reg, str, end, at, prev, &msa);
! 3290: }
! 3291:
! 3292: MATCH_ARG_FREE(msa);
! 3293: ONIG_STATE_DEC_THREAD(reg);
! 3294: return r;
! 3295: }
! 3296:
! 3297: static int
! 3298: forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
! 3299: UChar* range, UChar** low, UChar** high, UChar** low_prev)
! 3300: {
! 3301: UChar *p, *pprev = (UChar* )NULL;
! 3302:
! 3303: #ifdef ONIG_DEBUG_SEARCH
! 3304: fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
! 3305: (int )str, (int )end, (int )s, (int )range);
! 3306: #endif
! 3307:
! 3308: p = s;
! 3309: if (reg->dmin > 0) {
! 3310: if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
! 3311: p += reg->dmin;
! 3312: }
! 3313: else {
! 3314: UChar *q = p + reg->dmin;
! 3315: while (p < q) p += enc_len(reg->enc, p);
! 3316: }
! 3317: }
! 3318:
! 3319: retry:
! 3320: switch (reg->optimize) {
! 3321: case ONIG_OPTIMIZE_EXACT:
! 3322: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
! 3323: break;
! 3324: case ONIG_OPTIMIZE_EXACT_IC:
! 3325: p = slow_search_ic(reg->enc, reg->ambig_flag,
! 3326: reg->exact, reg->exact_end, p, end, range);
! 3327: break;
! 3328:
! 3329: case ONIG_OPTIMIZE_EXACT_BM:
! 3330: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
! 3331: break;
! 3332:
! 3333: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
! 3334: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
! 3335: break;
! 3336:
! 3337: case ONIG_OPTIMIZE_MAP:
! 3338: p = map_search(reg->enc, reg->map, p, range);
! 3339: break;
! 3340: }
! 3341:
! 3342: if (p && p < range) {
! 3343: if (p - reg->dmin < s) {
! 3344: retry_gate:
! 3345: pprev = p;
! 3346: p += enc_len(reg->enc, p);
! 3347: goto retry;
! 3348: }
! 3349:
! 3350: if (reg->sub_anchor) {
! 3351: UChar* prev;
! 3352:
! 3353: switch (reg->sub_anchor) {
! 3354: case ANCHOR_BEGIN_LINE:
! 3355: if (!ON_STR_BEGIN(p)) {
! 3356: prev = onigenc_get_prev_char_head(reg->enc,
! 3357: (pprev ? pprev : str), p);
! 3358: if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
! 3359: goto retry_gate;
! 3360: }
! 3361: break;
! 3362:
! 3363: case ANCHOR_END_LINE:
! 3364: if (ON_STR_END(p)) {
! 3365: prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
! 3366: (pprev ? pprev : str), p);
! 3367: if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
! 3368: goto retry_gate;
! 3369: }
! 3370: else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
! 3371: #ifdef USE_CRNL_AS_LINE_TERMINATOR
! 3372: && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
! 3373: #endif
! 3374: )
! 3375: goto retry_gate;
! 3376: break;
! 3377: }
! 3378: }
! 3379:
! 3380: if (reg->dmax == 0) {
! 3381: *low = p;
! 3382: if (low_prev) {
! 3383: if (*low > s)
! 3384: *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
! 3385: else
! 3386: *low_prev = onigenc_get_prev_char_head(reg->enc,
! 3387: (pprev ? pprev : str), p);
! 3388: }
! 3389: }
! 3390: else {
! 3391: if (reg->dmax != ONIG_INFINITE_DISTANCE) {
! 3392: *low = p - reg->dmax;
! 3393: if (*low > s) {
! 3394: *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
! 3395: *low, (const UChar** )low_prev);
! 3396: if (low_prev && IS_NULL(*low_prev))
! 3397: *low_prev = onigenc_get_prev_char_head(reg->enc,
! 3398: (pprev ? pprev : s), *low);
! 3399: }
! 3400: else {
! 3401: if (low_prev)
! 3402: *low_prev = onigenc_get_prev_char_head(reg->enc,
! 3403: (pprev ? pprev : str), *low);
! 3404: }
! 3405: }
! 3406: }
! 3407: /* no needs to adjust *high, *high is used as range check only */
! 3408: *high = p - reg->dmin;
! 3409:
! 3410: #ifdef ONIG_DEBUG_SEARCH
! 3411: fprintf(stderr,
! 3412: "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
! 3413: (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
! 3414: #endif
! 3415: return 1; /* success */
! 3416: }
! 3417:
! 3418: return 0; /* fail */
! 3419: }
! 3420:
! 3421: static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
! 3422: int** skip));
! 3423:
! 3424: #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
! 3425:
! 3426: static int
! 3427: backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
! 3428: UChar* s, const UChar* range, UChar* adjrange,
! 3429: UChar** low, UChar** high)
! 3430: {
! 3431: int r;
! 3432: UChar *p;
! 3433:
! 3434: range += reg->dmin;
! 3435: p = s;
! 3436:
! 3437: retry:
! 3438: switch (reg->optimize) {
! 3439: case ONIG_OPTIMIZE_EXACT:
! 3440: exact_method:
! 3441: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
! 3442: range, adjrange, end, p);
! 3443: break;
! 3444:
! 3445: case ONIG_OPTIMIZE_EXACT_IC:
! 3446: p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
! 3447: reg->exact, reg->exact_end,
! 3448: range, adjrange, end, p);
! 3449: break;
! 3450:
! 3451: case ONIG_OPTIMIZE_EXACT_BM:
! 3452: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
! 3453: if (IS_NULL(reg->int_map_backward)) {
! 3454: if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
! 3455: goto exact_method;
! 3456:
! 3457: r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
! 3458: &(reg->int_map_backward));
! 3459: if (r) return r;
! 3460: }
! 3461: p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
! 3462: end, p);
! 3463: break;
! 3464:
! 3465: case ONIG_OPTIMIZE_MAP:
! 3466: p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
! 3467: break;
! 3468: }
! 3469:
! 3470: if (p) {
! 3471: if (reg->sub_anchor) {
! 3472: UChar* prev;
! 3473:
! 3474: switch (reg->sub_anchor) {
! 3475: case ANCHOR_BEGIN_LINE:
! 3476: if (!ON_STR_BEGIN(p)) {
! 3477: prev = onigenc_get_prev_char_head(reg->enc, str, p);
! 3478: if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
! 3479: p = prev;
! 3480: goto retry;
! 3481: }
! 3482: }
! 3483: break;
! 3484:
! 3485: case ANCHOR_END_LINE:
! 3486: if (ON_STR_END(p)) {
! 3487: prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
! 3488: if (IS_NULL(prev)) goto fail;
! 3489: if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
! 3490: p = prev;
! 3491: goto retry;
! 3492: }
! 3493: }
! 3494: else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
! 3495: #ifdef USE_CRNL_AS_LINE_TERMINATOR
! 3496: && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
! 3497: #endif
! 3498: ) {
! 3499: p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
! 3500: if (IS_NULL(p)) goto fail;
! 3501: goto retry;
! 3502: }
! 3503: break;
! 3504: }
! 3505: }
! 3506:
! 3507: /* no needs to adjust *high, *high is used as range check only */
! 3508: if (reg->dmax != ONIG_INFINITE_DISTANCE) {
! 3509: *low = p - reg->dmax;
! 3510: *high = p - reg->dmin;
! 3511: *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
! 3512: }
! 3513:
! 3514: #ifdef ONIG_DEBUG_SEARCH
! 3515: fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
! 3516: (int )(*low - str), (int )(*high - str));
! 3517: #endif
! 3518: return 1; /* success */
! 3519: }
! 3520:
! 3521: fail:
! 3522: #ifdef ONIG_DEBUG_SEARCH
! 3523: fprintf(stderr, "backward_search_range: fail.\n");
! 3524: #endif
! 3525: return 0; /* fail */
! 3526: }
! 3527:
! 3528:
! 3529: extern int
! 3530: onig_search(regex_t* reg, const UChar* str, const UChar* end,
! 3531: const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
! 3532: {
! 3533: int r;
! 3534: UChar *s, *prev;
! 3535: MatchArg msa;
! 3536: const UChar *orig_start = start;
! 3537:
! 3538: #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
! 3539: start:
! 3540: THREAD_ATOMIC_START;
! 3541: if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
! 3542: ONIG_STATE_INC(reg);
! 3543: if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
! 3544: onig_chain_reduce(reg);
! 3545: ONIG_STATE_INC(reg);
! 3546: }
! 3547: }
! 3548: else {
! 3549: int n;
! 3550:
! 3551: THREAD_ATOMIC_END;
! 3552: n = 0;
! 3553: while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
! 3554: if (++n > THREAD_PASS_LIMIT_COUNT)
! 3555: return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
! 3556: THREAD_PASS;
! 3557: }
! 3558: goto start;
! 3559: }
! 3560: THREAD_ATOMIC_END;
! 3561: #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
! 3562:
! 3563: #ifdef ONIG_DEBUG_SEARCH
! 3564: fprintf(stderr,
! 3565: "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
! 3566: (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
! 3567: #endif
! 3568:
! 3569: if (region
! 3570: #ifdef USE_POSIX_REGION_OPTION
! 3571: && !IS_POSIX_REGION(option)
! 3572: #endif
! 3573: ) {
! 3574: r = onig_region_resize_clear(region, reg->num_mem + 1);
! 3575: if (r) goto finish_no_msa;
! 3576: }
! 3577:
! 3578: if (start > end || start < str) goto mismatch_no_msa;
! 3579:
! 3580: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
! 3581: #define MATCH_AND_RETURN_CHECK \
! 3582: r = match_at(reg, str, end, s, prev, &msa);\
! 3583: if (r != ONIG_MISMATCH) {\
! 3584: if (r >= 0) {\
! 3585: if (! IS_FIND_LONGEST(reg->options)) {\
! 3586: goto match;\
! 3587: }\
! 3588: }\
! 3589: else goto finish; /* error */ \
! 3590: }
! 3591: #else
! 3592: #define MATCH_AND_RETURN_CHECK \
! 3593: r = match_at(reg, str, end, s, prev, &msa);\
! 3594: if (r != ONIG_MISMATCH) {\
! 3595: if (r >= 0) {\
! 3596: goto match;\
! 3597: }\
! 3598: else goto finish; /* error */ \
! 3599: }
! 3600: #endif
! 3601:
! 3602: /* anchor optimize: resume search range */
! 3603: if (reg->anchor != 0 && str < end) {
! 3604: UChar *min_semi_end, *max_semi_end;
! 3605:
! 3606: if (reg->anchor & ANCHOR_BEGIN_POSITION) {
! 3607: /* search start-position only */
! 3608: begin_position:
! 3609: if (range > start)
! 3610: range = start + 1;
! 3611: else
! 3612: range = start;
! 3613: }
! 3614: else if (reg->anchor & ANCHOR_BEGIN_BUF) {
! 3615: /* search str-position only */
! 3616: if (range > start) {
! 3617: if (start != str) goto mismatch_no_msa;
! 3618: range = str + 1;
! 3619: }
! 3620: else {
! 3621: if (range <= str) {
! 3622: start = str;
! 3623: range = str;
! 3624: }
! 3625: else
! 3626: goto mismatch_no_msa;
! 3627: }
! 3628: }
! 3629: else if (reg->anchor & ANCHOR_END_BUF) {
! 3630: min_semi_end = max_semi_end = (UChar* )end;
! 3631:
! 3632: end_buf:
! 3633: if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
! 3634: goto mismatch_no_msa;
! 3635:
! 3636: if (range > start) {
! 3637: if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
! 3638: start = min_semi_end - reg->anchor_dmax;
! 3639: if (start < end)
! 3640: start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
! 3641: else { /* match with empty at end */
! 3642: start = onigenc_get_prev_char_head(reg->enc, str, end);
! 3643: }
! 3644: }
! 3645: if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
! 3646: range = max_semi_end - reg->anchor_dmin + 1;
! 3647: }
! 3648:
! 3649: if (start >= range) goto mismatch_no_msa;
! 3650: }
! 3651: else {
! 3652: if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
! 3653: range = min_semi_end - reg->anchor_dmax;
! 3654: }
! 3655: if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
! 3656: start = max_semi_end - reg->anchor_dmin;
! 3657: start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
! 3658: }
! 3659: if (range > start) goto mismatch_no_msa;
! 3660: }
! 3661: }
! 3662: else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
! 3663: UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
! 3664:
! 3665: max_semi_end = (UChar* )end;
! 3666: if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
! 3667: min_semi_end = pre_end;
! 3668:
! 3669: #ifdef USE_CRNL_AS_LINE_TERMINATOR
! 3670: pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
! 3671: if (IS_NOT_NULL(pre_end) &&
! 3672: ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
! 3673: min_semi_end = pre_end;
! 3674: }
! 3675: #endif
! 3676: if (min_semi_end > str && start <= min_semi_end) {
! 3677: goto end_buf;
! 3678: }
! 3679: }
! 3680: else {
! 3681: min_semi_end = (UChar* )end;
! 3682: goto end_buf;
! 3683: }
! 3684: }
! 3685: else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
! 3686: goto begin_position;
! 3687: }
! 3688: }
! 3689: else if (str == end) { /* empty string */
! 3690: static const UChar* address_for_empty_string = (UChar* )"";
! 3691:
! 3692: #ifdef ONIG_DEBUG_SEARCH
! 3693: fprintf(stderr, "onig_search: empty string.\n");
! 3694: #endif
! 3695:
! 3696: if (reg->threshold_len == 0) {
! 3697: start = end = str = address_for_empty_string;
! 3698: s = (UChar* )start;
! 3699: prev = (UChar* )NULL;
! 3700:
! 3701: MATCH_ARG_INIT(msa, option, region, start);
! 3702: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 3703: msa.state_check_buff = (void* )0;
! 3704: msa.state_check_buff_size = 0;
! 3705: #endif
! 3706: MATCH_AND_RETURN_CHECK;
! 3707: goto mismatch;
! 3708: }
! 3709: goto mismatch_no_msa;
! 3710: }
! 3711:
! 3712: #ifdef ONIG_DEBUG_SEARCH
! 3713: fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
! 3714: (int )(end - str), (int )(start - str), (int )(range - str));
! 3715: #endif
! 3716:
! 3717: MATCH_ARG_INIT(msa, option, region, orig_start);
! 3718: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 3719: {
! 3720: int offset = (MIN(start, range) - str);
! 3721: STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
! 3722: }
! 3723: #endif
! 3724:
! 3725: s = (UChar* )start;
! 3726: if (range > start) { /* forward search */
! 3727: if (s > str)
! 3728: prev = onigenc_get_prev_char_head(reg->enc, str, s);
! 3729: else
! 3730: prev = (UChar* )NULL;
! 3731:
! 3732: if (reg->optimize != ONIG_OPTIMIZE_NONE) {
! 3733: UChar *sch_range, *low, *high, *low_prev;
! 3734:
! 3735: sch_range = (UChar* )range;
! 3736: if (reg->dmax != 0) {
! 3737: if (reg->dmax == ONIG_INFINITE_DISTANCE)
! 3738: sch_range = (UChar* )end;
! 3739: else {
! 3740: sch_range += reg->dmax;
! 3741: if (sch_range > end) sch_range = (UChar* )end;
! 3742: }
! 3743: }
! 3744:
! 3745: if ((end - start) < reg->threshold_len)
! 3746: goto mismatch;
! 3747:
! 3748: if (reg->dmax != ONIG_INFINITE_DISTANCE) {
! 3749: do {
! 3750: if (! forward_search_range(reg, str, end, s, sch_range,
! 3751: &low, &high, &low_prev)) goto mismatch;
! 3752: if (s < low) {
! 3753: s = low;
! 3754: prev = low_prev;
! 3755: }
! 3756: while (s <= high) {
! 3757: MATCH_AND_RETURN_CHECK;
! 3758: prev = s;
! 3759: s += enc_len(reg->enc, s);
! 3760: }
! 3761: } while (s < range);
! 3762: goto mismatch;
! 3763: }
! 3764: else { /* check only. */
! 3765: if (! forward_search_range(reg, str, end, s, sch_range,
! 3766: &low, &high, (UChar** )NULL)) goto mismatch;
! 3767:
! 3768: if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
! 3769: do {
! 3770: MATCH_AND_RETURN_CHECK;
! 3771: prev = s;
! 3772: s += enc_len(reg->enc, s);
! 3773:
! 3774: while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
! 3775: prev = s;
! 3776: s += enc_len(reg->enc, s);
! 3777: }
! 3778: } while (s < range);
! 3779: goto mismatch;
! 3780: }
! 3781: }
! 3782: }
! 3783:
! 3784: do {
! 3785: MATCH_AND_RETURN_CHECK;
! 3786: prev = s;
! 3787: s += enc_len(reg->enc, s);
! 3788: } while (s < range);
! 3789:
! 3790: if (s == range) { /* because empty match with /$/. */
! 3791: MATCH_AND_RETURN_CHECK;
! 3792: }
! 3793: }
! 3794: else { /* backward search */
! 3795: if (reg->optimize != ONIG_OPTIMIZE_NONE) {
! 3796: UChar *low, *high, *adjrange, *sch_start;
! 3797:
! 3798: if (range < end)
! 3799: adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
! 3800: else
! 3801: adjrange = (UChar* )end;
! 3802:
! 3803: if (reg->dmax != ONIG_INFINITE_DISTANCE &&
! 3804: (end - range) >= reg->threshold_len) {
! 3805: do {
! 3806: sch_start = s + reg->dmax;
! 3807: if (sch_start > end) sch_start = (UChar* )end;
! 3808: if (backward_search_range(reg, str, end, sch_start, range, adjrange,
! 3809: &low, &high) <= 0)
! 3810: goto mismatch;
! 3811:
! 3812: if (s > high)
! 3813: s = high;
! 3814:
! 3815: while (s >= low) {
! 3816: prev = onigenc_get_prev_char_head(reg->enc, str, s);
! 3817: MATCH_AND_RETURN_CHECK;
! 3818: s = prev;
! 3819: }
! 3820: } while (s >= range);
! 3821: goto mismatch;
! 3822: }
! 3823: else { /* check only. */
! 3824: if ((end - range) < reg->threshold_len) goto mismatch;
! 3825:
! 3826: sch_start = s;
! 3827: if (reg->dmax != 0) {
! 3828: if (reg->dmax == ONIG_INFINITE_DISTANCE)
! 3829: sch_start = (UChar* )end;
! 3830: else {
! 3831: sch_start += reg->dmax;
! 3832: if (sch_start > end) sch_start = (UChar* )end;
! 3833: else
! 3834: sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
! 3835: start, sch_start);
! 3836: }
! 3837: }
! 3838: if (backward_search_range(reg, str, end, sch_start, range, adjrange,
! 3839: &low, &high) <= 0) goto mismatch;
! 3840: }
! 3841: }
! 3842:
! 3843: do {
! 3844: prev = onigenc_get_prev_char_head(reg->enc, str, s);
! 3845: MATCH_AND_RETURN_CHECK;
! 3846: s = prev;
! 3847: } while (s >= range);
! 3848: }
! 3849:
! 3850: mismatch:
! 3851: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
! 3852: if (IS_FIND_LONGEST(reg->options)) {
! 3853: if (msa.best_len >= 0) {
! 3854: s = msa.best_s;
! 3855: goto match;
! 3856: }
! 3857: }
! 3858: #endif
! 3859: r = ONIG_MISMATCH;
! 3860:
! 3861: finish:
! 3862: MATCH_ARG_FREE(msa);
! 3863: ONIG_STATE_DEC_THREAD(reg);
! 3864:
! 3865: /* If result is mismatch and no FIND_NOT_EMPTY option,
! 3866: then the region is not setted in match_at(). */
! 3867: if (IS_FIND_NOT_EMPTY(reg->options) && region
! 3868: #ifdef USE_POSIX_REGION_OPTION
! 3869: && !IS_POSIX_REGION(option)
! 3870: #endif
! 3871: ) {
! 3872: onig_region_clear(region);
! 3873: }
! 3874:
! 3875: #ifdef ONIG_DEBUG
! 3876: if (r != ONIG_MISMATCH)
! 3877: fprintf(stderr, "onig_search: error %d\n", r);
! 3878: #endif
! 3879: return r;
! 3880:
! 3881: mismatch_no_msa:
! 3882: r = ONIG_MISMATCH;
! 3883: finish_no_msa:
! 3884: ONIG_STATE_DEC_THREAD(reg);
! 3885: #ifdef ONIG_DEBUG
! 3886: if (r != ONIG_MISMATCH)
! 3887: fprintf(stderr, "onig_search: error %d\n", r);
! 3888: #endif
! 3889: return r;
! 3890:
! 3891: match:
! 3892: ONIG_STATE_DEC_THREAD(reg);
! 3893: MATCH_ARG_FREE(msa);
! 3894: return s - str;
! 3895: }
! 3896:
! 3897: extern OnigEncoding
! 3898: onig_get_encoding(regex_t* reg)
! 3899: {
! 3900: return reg->enc;
! 3901: }
! 3902:
! 3903: extern OnigOptionType
! 3904: onig_get_options(regex_t* reg)
! 3905: {
! 3906: return reg->options;
! 3907: }
! 3908:
! 3909: extern OnigAmbigType
! 3910: onig_get_ambig_flag(regex_t* reg)
! 3911: {
! 3912: return reg->ambig_flag;
! 3913: }
! 3914:
! 3915: extern OnigSyntaxType*
! 3916: onig_get_syntax(regex_t* reg)
! 3917: {
! 3918: return reg->syntax;
! 3919: }
! 3920:
! 3921: extern int
! 3922: onig_number_of_captures(regex_t* reg)
! 3923: {
! 3924: return reg->num_mem;
! 3925: }
! 3926:
! 3927: extern int
! 3928: onig_number_of_capture_histories(regex_t* reg)
! 3929: {
! 3930: #ifdef USE_CAPTURE_HISTORY
! 3931: int i, n;
! 3932:
! 3933: n = 0;
! 3934: for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
! 3935: if (BIT_STATUS_AT(reg->capture_history, i) != 0)
! 3936: n++;
! 3937: }
! 3938: return n;
! 3939: #else
! 3940: return 0;
! 3941: #endif
! 3942: }
! 3943:
! 3944: extern void
! 3945: onig_copy_encoding(OnigEncoding to, OnigEncoding from)
! 3946: {
! 3947: *to = *from;
! 3948: }
! 3949:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>