Annotation of embedaddon/php/ext/mbstring/oniguruma/regexec.c, revision 1.1.1.1
1.1 misho 1: /**********************************************************************
2: regexec.c - Oniguruma (regular expression library)
3: **********************************************************************/
4: /*-
5: * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include "regint.h"
31:
32: #ifdef USE_CRNL_AS_LINE_TERMINATOR
33: #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
34: (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
35: ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
36: #endif
37:
38: #ifdef USE_CAPTURE_HISTORY
39: static void history_tree_free(OnigCaptureTreeNode* node);
40:
41: static void
42: history_tree_clear(OnigCaptureTreeNode* node)
43: {
44: int i;
45:
46: if (IS_NOT_NULL(node)) {
47: for (i = 0; i < node->num_childs; i++) {
48: if (IS_NOT_NULL(node->childs[i])) {
49: history_tree_free(node->childs[i]);
50: }
51: }
52: for (i = 0; i < node->allocated; i++) {
53: node->childs[i] = (OnigCaptureTreeNode* )0;
54: }
55: node->num_childs = 0;
56: node->beg = ONIG_REGION_NOTPOS;
57: node->end = ONIG_REGION_NOTPOS;
58: node->group = -1;
59: }
60: }
61:
62: static void
63: history_tree_free(OnigCaptureTreeNode* node)
64: {
65: history_tree_clear(node);
66: xfree(node);
67: }
68:
69: static void
70: history_root_free(OnigRegion* r)
71: {
72: if (IS_NOT_NULL(r->history_root)) {
73: history_tree_free(r->history_root);
74: r->history_root = (OnigCaptureTreeNode* )0;
75: }
76: }
77:
78: static OnigCaptureTreeNode*
79: history_node_new(void)
80: {
81: OnigCaptureTreeNode* node;
82:
83: node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
84: CHECK_NULL_RETURN(node);
85: node->childs = (OnigCaptureTreeNode** )0;
86: node->allocated = 0;
87: node->num_childs = 0;
88: node->group = -1;
89: node->beg = ONIG_REGION_NOTPOS;
90: node->end = ONIG_REGION_NOTPOS;
91:
92: return node;
93: }
94:
95: static int
96: history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
97: {
98: #define HISTORY_TREE_INIT_ALLOC_SIZE 8
99:
100: if (parent->num_childs >= parent->allocated) {
101: int n, i;
102:
103: if (IS_NULL(parent->childs)) {
104: n = HISTORY_TREE_INIT_ALLOC_SIZE;
105: parent->childs =
106: (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
107: }
108: else {
109: n = parent->allocated * 2;
110: parent->childs =
111: (OnigCaptureTreeNode** )xrealloc(parent->childs,
112: sizeof(OnigCaptureTreeNode*) * n);
113: }
114: CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
115: for (i = parent->allocated; i < n; i++) {
116: parent->childs[i] = (OnigCaptureTreeNode* )0;
117: }
118: parent->allocated = n;
119: }
120:
121: parent->childs[parent->num_childs] = child;
122: parent->num_childs++;
123: return 0;
124: }
125:
126: static OnigCaptureTreeNode*
127: history_tree_clone(OnigCaptureTreeNode* node)
128: {
129: int i;
130: OnigCaptureTreeNode *clone, *child;
131:
132: clone = history_node_new();
133: CHECK_NULL_RETURN(clone);
134:
135: clone->beg = node->beg;
136: clone->end = node->end;
137: for (i = 0; i < node->num_childs; i++) {
138: child = history_tree_clone(node->childs[i]);
139: if (IS_NULL(child)) {
140: history_tree_free(clone);
141: return (OnigCaptureTreeNode* )0;
142: }
143: history_tree_add_child(clone, child);
144: }
145:
146: return clone;
147: }
148:
149: extern OnigCaptureTreeNode*
150: onig_get_capture_tree(OnigRegion* region)
151: {
152: return region->history_root;
153: }
154: #endif /* USE_CAPTURE_HISTORY */
155:
156: extern void
157: onig_region_clear(OnigRegion* region)
158: {
159: int i;
160:
161: for (i = 0; i < region->num_regs; i++) {
162: region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
163: }
164: #ifdef USE_CAPTURE_HISTORY
165: history_root_free(region);
166: #endif
167: }
168:
169: extern int
170: onig_region_resize(OnigRegion* region, int n)
171: {
172: region->num_regs = n;
173:
174: if (n < ONIG_NREGION)
175: n = ONIG_NREGION;
176:
177: if (region->allocated == 0) {
178: region->beg = (int* )xmalloc(n * sizeof(int));
179: region->end = (int* )xmalloc(n * sizeof(int));
180:
181: if (region->beg == 0 || region->end == 0)
182: return ONIGERR_MEMORY;
183:
184: region->allocated = n;
185: }
186: else if (region->allocated < n) {
187: region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
188: region->end = (int* )xrealloc(region->end, n * sizeof(int));
189:
190: if (region->beg == 0 || region->end == 0)
191: return ONIGERR_MEMORY;
192:
193: region->allocated = n;
194: }
195:
196: return 0;
197: }
198:
199: extern int
200: onig_region_resize_clear(OnigRegion* region, int n)
201: {
202: int r;
203:
204: r = onig_region_resize(region, n);
205: if (r != 0) return r;
206: onig_region_clear(region);
207: return 0;
208: }
209:
210: extern int
211: onig_region_set(OnigRegion* region, int at, int beg, int end)
212: {
213: if (at < 0) return ONIGERR_INVALID_ARGUMENT;
214:
215: if (at >= region->allocated) {
216: int r = onig_region_resize(region, at + 1);
217: if (r < 0) return r;
218: }
219:
220: region->beg[at] = beg;
221: region->end[at] = end;
222: return 0;
223: }
224:
225: extern void
226: onig_region_init(OnigRegion* region)
227: {
228: region->num_regs = 0;
229: region->allocated = 0;
230: region->beg = (int* )0;
231: region->end = (int* )0;
232: region->history_root = (OnigCaptureTreeNode* )0;
233: }
234:
235: extern OnigRegion*
236: onig_region_new(void)
237: {
238: OnigRegion* r;
239:
240: r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
241: onig_region_init(r);
242: return r;
243: }
244:
245: extern void
246: onig_region_free(OnigRegion* r, int free_self)
247: {
248: if (r) {
249: if (r->allocated > 0) {
250: if (r->beg) xfree(r->beg);
251: if (r->end) xfree(r->end);
252: r->allocated = 0;
253: }
254: #ifdef USE_CAPTURE_HISTORY
255: history_root_free(r);
256: #endif
257: if (free_self) xfree(r);
258: }
259: }
260:
261: extern void
262: onig_region_copy(OnigRegion* to, OnigRegion* from)
263: {
264: #define RREGC_SIZE (sizeof(int) * from->num_regs)
265: int i;
266:
267: if (to == from) return;
268:
269: if (to->allocated == 0) {
270: if (from->num_regs > 0) {
271: to->beg = (int* )xmalloc(RREGC_SIZE);
272: to->end = (int* )xmalloc(RREGC_SIZE);
273: to->allocated = from->num_regs;
274: }
275: }
276: else if (to->allocated < from->num_regs) {
277: to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
278: to->end = (int* )xrealloc(to->end, RREGC_SIZE);
279: to->allocated = from->num_regs;
280: }
281:
282: for (i = 0; i < from->num_regs; i++) {
283: to->beg[i] = from->beg[i];
284: to->end[i] = from->end[i];
285: }
286: to->num_regs = from->num_regs;
287:
288: #ifdef USE_CAPTURE_HISTORY
289: history_root_free(to);
290:
291: if (IS_NOT_NULL(from->history_root)) {
292: to->history_root = history_tree_clone(from->history_root);
293: }
294: #endif
295: }
296:
297:
298: /** stack **/
299: #define INVALID_STACK_INDEX -1
300: typedef long StackIndex;
301:
302: typedef struct _StackType {
303: unsigned int type;
304: union {
305: struct {
306: UChar *pcode; /* byte code position */
307: UChar *pstr; /* string position */
308: UChar *pstr_prev; /* previous char position of pstr */
309: #ifdef USE_COMBINATION_EXPLOSION_CHECK
310: unsigned int state_check;
311: #endif
312: } state;
313: struct {
314: int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
315: UChar *pcode; /* byte code position (head of repeated target) */
316: int num; /* repeat id */
317: } repeat;
318: struct {
319: StackIndex si; /* index of stack */
320: } repeat_inc;
321: struct {
322: int num; /* memory num */
323: UChar *pstr; /* start/end position */
324: /* Following information is setted, if this stack type is MEM-START */
325: StackIndex start; /* prev. info (for backtrack "(...)*" ) */
326: StackIndex end; /* prev. info (for backtrack "(...)*" ) */
327: } mem;
328: struct {
329: int num; /* null check id */
330: UChar *pstr; /* start position */
331: } null_check;
332: #ifdef USE_SUBEXP_CALL
333: struct {
334: UChar *ret_addr; /* byte code position */
335: int num; /* null check id */
336: UChar *pstr; /* string position */
337: } call_frame;
338: #endif
339: } u;
340: } StackType;
341:
342: /* stack type */
343: /* used by normal-POP */
344: #define STK_ALT 0x0001
345: #define STK_LOOK_BEHIND_NOT 0x0002
346: #define STK_POS_NOT 0x0003
347: /* handled by normal-POP */
348: #define STK_MEM_START 0x0100
349: #define STK_MEM_END 0x8200
350: #define STK_REPEAT_INC 0x0300
351: #define STK_STATE_CHECK_MARK 0x1000
352: /* avoided by normal-POP */
353: #define STK_NULL_CHECK_START 0x3000
354: #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
355: #define STK_MEM_END_MARK 0x8400
356: #define STK_POS 0x0500 /* used when POP-POS */
357: #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
358: #define STK_REPEAT 0x0700
359: #define STK_CALL_FRAME 0x0800
360: #define STK_RETURN 0x0900
361: #define STK_VOID 0x0a00 /* for fill a blank */
362:
363: /* stack type check mask */
364: #define STK_MASK_POP_USED 0x00ff
365: #define STK_MASK_TO_VOID_TARGET 0x10ff
366: #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
367:
368: typedef struct {
369: void* stack_p;
370: int stack_n;
371: OnigOptionType options;
372: OnigRegion* region;
373: const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
374: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
375: int best_len; /* for ONIG_OPTION_FIND_LONGEST */
376: UChar* best_s;
377: #endif
378: #ifdef USE_COMBINATION_EXPLOSION_CHECK
379: void* state_check_buff;
380: int state_check_buff_size;
381: #endif
382: } MatchArg;
383:
384: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
385: #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
386: (msa).stack_p = (void* )0;\
387: (msa).options = (arg_option);\
388: (msa).region = (arg_region);\
389: (msa).start = (arg_start);\
390: (msa).best_len = ONIG_MISMATCH;\
391: } while (0)
392: #else
393: #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
394: (msa).stack_p = (void* )0;\
395: (msa).options = (arg_option);\
396: (msa).region = (arg_region);\
397: (msa).start = (arg_start);\
398: } while (0)
399: #endif
400:
401: #ifdef USE_COMBINATION_EXPLOSION_CHECK
402:
403: #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
404:
405: #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
406: if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
407: unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
408: offset = ((offset) * (state_num)) >> 3;\
409: if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
410: if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
411: (msa).state_check_buff = (void* )xmalloc(size);\
412: else \
413: (msa).state_check_buff = (void* )xalloca(size);\
414: xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
415: (size_t )(size - (offset))); \
416: (msa).state_check_buff_size = size;\
417: }\
418: else {\
419: (msa).state_check_buff = (void* )0;\
420: (msa).state_check_buff_size = 0;\
421: }\
422: }\
423: else {\
424: (msa).state_check_buff = (void* )0;\
425: (msa).state_check_buff_size = 0;\
426: }\
427: } while (0)
428:
429: #define MATCH_ARG_FREE(msa) do {\
430: if ((msa).stack_p) xfree((msa).stack_p);\
431: if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
432: if ((msa).state_check_buff) xfree((msa).state_check_buff);\
433: }\
434: } while (0);
435: #else
436: #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
437: #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
438: #endif
439:
440:
441:
442: #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
443: if (msa->stack_p) {\
444: alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
445: stk_alloc = (StackType* )(msa->stack_p);\
446: stk_base = stk_alloc;\
447: stk = stk_base;\
448: stk_end = stk_base + msa->stack_n;\
449: }\
450: else {\
451: alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
452: + sizeof(StackType) * (stack_num));\
453: stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
454: stk_base = stk_alloc;\
455: stk = stk_base;\
456: stk_end = stk_base + (stack_num);\
457: }\
458: } while(0)
459:
460: #define STACK_SAVE do{\
461: if (stk_base != stk_alloc) {\
462: msa->stack_p = stk_base;\
463: msa->stack_n = stk_end - stk_base;\
464: };\
465: } while(0)
466:
467: static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
468:
469: extern unsigned int
470: onig_get_match_stack_limit_size(void)
471: {
472: return MatchStackLimitSize;
473: }
474:
475: extern int
476: onig_set_match_stack_limit_size(unsigned int size)
477: {
478: MatchStackLimitSize = size;
479: return 0;
480: }
481:
482: static int
483: stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
484: StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
485: {
486: unsigned int n;
487: StackType *x, *stk_base, *stk_end, *stk;
488:
489: stk_base = *arg_stk_base;
490: stk_end = *arg_stk_end;
491: stk = *arg_stk;
492:
493: n = stk_end - stk_base;
494: if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
495: x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
496: if (IS_NULL(x)) {
497: STACK_SAVE;
498: return ONIGERR_MEMORY;
499: }
500: xmemcpy(x, stk_base, n * sizeof(StackType));
501: n *= 2;
502: }
503: else {
504: n *= 2;
505: if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
506: if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
507: return ONIGERR_MATCH_STACK_LIMIT_OVER;
508: else
509: n = MatchStackLimitSize;
510: }
511: x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
512: if (IS_NULL(x)) {
513: STACK_SAVE;
514: return ONIGERR_MEMORY;
515: }
516: }
517: *arg_stk = x + (stk - stk_base);
518: *arg_stk_base = x;
519: *arg_stk_end = x + n;
520: return 0;
521: }
522:
523: #define STACK_ENSURE(n) do {\
524: if (stk_end - stk < (n)) {\
525: int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
526: if (r != 0) { STACK_SAVE; return r; } \
527: }\
528: } while(0)
529:
530: #define STACK_AT(index) (stk_base + (index))
531: #define GET_STACK_INDEX(stk) ((stk) - stk_base)
532:
533: #define STACK_PUSH_TYPE(stack_type) do {\
534: STACK_ENSURE(1);\
535: stk->type = (stack_type);\
536: STACK_INC;\
537: } while(0)
538:
539: #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
540:
541: #ifdef USE_COMBINATION_EXPLOSION_CHECK
542: #define STATE_CHECK_POS(s,snum) \
543: (((s) - str) * num_comb_exp_check + ((snum) - 1))
544: #define STATE_CHECK_VAL(v,snum) do {\
545: if (state_check_buff != NULL) {\
546: int x = STATE_CHECK_POS(s,snum);\
547: (v) = state_check_buff[x/8] & (1<<(x%8));\
548: }\
549: else (v) = 0;\
550: } while(0)
551:
552:
553: #define ELSE_IF_STATE_CHECK_MARK(stk) \
554: else if ((stk)->type == STK_STATE_CHECK_MARK) { \
555: int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
556: state_check_buff[x/8] |= (1<<(x%8)); \
557: }
558:
559: #define STACK_PUSH(stack_type,pat,s,sprev) do {\
560: STACK_ENSURE(1);\
561: stk->type = (stack_type);\
562: stk->u.state.pcode = (pat);\
563: stk->u.state.pstr = (s);\
564: stk->u.state.pstr_prev = (sprev);\
565: stk->u.state.state_check = 0;\
566: STACK_INC;\
567: } while(0)
568:
569: #define STACK_PUSH_ENSURED(stack_type,pat) do {\
570: stk->type = (stack_type);\
571: stk->u.state.pcode = (pat);\
572: stk->u.state.state_check = 0;\
573: STACK_INC;\
574: } while(0)
575:
576: #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
577: STACK_ENSURE(1);\
578: stk->type = STK_ALT;\
579: stk->u.state.pcode = (pat);\
580: stk->u.state.pstr = (s);\
581: stk->u.state.pstr_prev = (sprev);\
582: stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
583: STACK_INC;\
584: } while(0)
585:
586: #define STACK_PUSH_STATE_CHECK(s,snum) do {\
587: if (state_check_buff != NULL) {\
588: STACK_ENSURE(1);\
589: stk->type = STK_STATE_CHECK_MARK;\
590: stk->u.state.pstr = (s);\
591: stk->u.state.state_check = (snum);\
592: STACK_INC;\
593: }\
594: } while(0)
595:
596: #else /* USE_COMBINATION_EXPLOSION_CHECK */
597:
598: #define ELSE_IF_STATE_CHECK_MARK(stk)
599:
600: #define STACK_PUSH(stack_type,pat,s,sprev) do {\
601: STACK_ENSURE(1);\
602: stk->type = (stack_type);\
603: stk->u.state.pcode = (pat);\
604: stk->u.state.pstr = (s);\
605: stk->u.state.pstr_prev = (sprev);\
606: STACK_INC;\
607: } while(0)
608:
609: #define STACK_PUSH_ENSURED(stack_type,pat) do {\
610: stk->type = (stack_type);\
611: stk->u.state.pcode = (pat);\
612: STACK_INC;\
613: } while(0)
614: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
615:
616: #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
617: #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
618: #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
619: #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
620: #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
621: STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
622:
623: #define STACK_PUSH_REPEAT(id, pat) do {\
624: STACK_ENSURE(1);\
625: stk->type = STK_REPEAT;\
626: stk->u.repeat.num = (id);\
627: stk->u.repeat.pcode = (pat);\
628: stk->u.repeat.count = 0;\
629: STACK_INC;\
630: } while(0)
631:
632: #define STACK_PUSH_REPEAT_INC(sindex) do {\
633: STACK_ENSURE(1);\
634: stk->type = STK_REPEAT_INC;\
635: stk->u.repeat_inc.si = (sindex);\
636: STACK_INC;\
637: } while(0)
638:
639: #define STACK_PUSH_MEM_START(mnum, s) do {\
640: STACK_ENSURE(1);\
641: stk->type = STK_MEM_START;\
642: stk->u.mem.num = (mnum);\
643: stk->u.mem.pstr = (s);\
644: stk->u.mem.start = mem_start_stk[mnum];\
645: stk->u.mem.end = mem_end_stk[mnum];\
646: mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
647: mem_end_stk[mnum] = INVALID_STACK_INDEX;\
648: STACK_INC;\
649: } while(0)
650:
651: #define STACK_PUSH_MEM_END(mnum, s) do {\
652: STACK_ENSURE(1);\
653: stk->type = STK_MEM_END;\
654: stk->u.mem.num = (mnum);\
655: stk->u.mem.pstr = (s);\
656: stk->u.mem.start = mem_start_stk[mnum];\
657: stk->u.mem.end = mem_end_stk[mnum];\
658: mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
659: STACK_INC;\
660: } while(0)
661:
662: #define STACK_PUSH_MEM_END_MARK(mnum) do {\
663: STACK_ENSURE(1);\
664: stk->type = STK_MEM_END_MARK;\
665: stk->u.mem.num = (mnum);\
666: STACK_INC;\
667: } while(0)
668:
669: #define STACK_GET_MEM_START(mnum, k) do {\
670: int level = 0;\
671: k = stk;\
672: while (k > stk_base) {\
673: k--;\
674: if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
675: && k->u.mem.num == (mnum)) {\
676: level++;\
677: }\
678: else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
679: if (level == 0) break;\
680: level--;\
681: }\
682: }\
683: } while (0)
684:
685: #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
686: int level = 0;\
687: while (k < stk) {\
688: if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
689: if (level == 0) (start) = k->u.mem.pstr;\
690: level++;\
691: }\
692: else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
693: level--;\
694: if (level == 0) {\
695: (end) = k->u.mem.pstr;\
696: break;\
697: }\
698: }\
699: k++;\
700: }\
701: } while (0)
702:
703: #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
704: STACK_ENSURE(1);\
705: stk->type = STK_NULL_CHECK_START;\
706: stk->u.null_check.num = (cnum);\
707: stk->u.null_check.pstr = (s);\
708: STACK_INC;\
709: } while(0)
710:
711: #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
712: STACK_ENSURE(1);\
713: stk->type = STK_NULL_CHECK_END;\
714: stk->u.null_check.num = (cnum);\
715: STACK_INC;\
716: } while(0)
717:
718: #define STACK_PUSH_CALL_FRAME(pat) do {\
719: STACK_ENSURE(1);\
720: stk->type = STK_CALL_FRAME;\
721: stk->u.call_frame.ret_addr = (pat);\
722: STACK_INC;\
723: } while(0)
724:
725: #define STACK_PUSH_RETURN do {\
726: STACK_ENSURE(1);\
727: stk->type = STK_RETURN;\
728: STACK_INC;\
729: } while(0)
730:
731:
732: #ifdef ONIG_DEBUG
733: #define STACK_BASE_CHECK(p, at) \
734: if ((p) < stk_base) {\
735: fprintf(stderr, "at %s\n", at);\
736: goto stack_error;\
737: }
738: #else
739: #define STACK_BASE_CHECK(p, at)
740: #endif
741:
742: #define STACK_POP_ONE do {\
743: stk--;\
744: STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
745: } while(0)
746:
747: #define STACK_POP do {\
748: switch (pop_level) {\
749: case STACK_POP_LEVEL_FREE:\
750: while (1) {\
751: stk--;\
752: STACK_BASE_CHECK(stk, "STACK_POP"); \
753: if ((stk->type & STK_MASK_POP_USED) != 0) break;\
754: ELSE_IF_STATE_CHECK_MARK(stk);\
755: }\
756: break;\
757: case STACK_POP_LEVEL_MEM_START:\
758: while (1) {\
759: stk--;\
760: STACK_BASE_CHECK(stk, "STACK_POP 2"); \
761: if ((stk->type & STK_MASK_POP_USED) != 0) break;\
762: else if (stk->type == STK_MEM_START) {\
763: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
764: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
765: }\
766: ELSE_IF_STATE_CHECK_MARK(stk);\
767: }\
768: break;\
769: default:\
770: while (1) {\
771: stk--;\
772: STACK_BASE_CHECK(stk, "STACK_POP 3"); \
773: if ((stk->type & STK_MASK_POP_USED) != 0) break;\
774: else if (stk->type == STK_MEM_START) {\
775: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
776: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
777: }\
778: else if (stk->type == STK_REPEAT_INC) {\
779: STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
780: }\
781: else if (stk->type == STK_MEM_END) {\
782: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
783: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
784: }\
785: ELSE_IF_STATE_CHECK_MARK(stk);\
786: }\
787: break;\
788: }\
789: } while(0)
790:
791: #define STACK_POP_TIL_POS_NOT do {\
792: while (1) {\
793: stk--;\
794: STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
795: if (stk->type == STK_POS_NOT) break;\
796: else if (stk->type == STK_MEM_START) {\
797: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
798: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
799: }\
800: else if (stk->type == STK_REPEAT_INC) {\
801: STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
802: }\
803: else if (stk->type == STK_MEM_END) {\
804: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
805: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
806: }\
807: ELSE_IF_STATE_CHECK_MARK(stk);\
808: }\
809: } while(0)
810:
811: #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
812: while (1) {\
813: stk--;\
814: STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
815: if (stk->type == STK_LOOK_BEHIND_NOT) break;\
816: else if (stk->type == STK_MEM_START) {\
817: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
818: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
819: }\
820: else if (stk->type == STK_REPEAT_INC) {\
821: STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
822: }\
823: else if (stk->type == STK_MEM_END) {\
824: mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
825: mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
826: }\
827: ELSE_IF_STATE_CHECK_MARK(stk);\
828: }\
829: } while(0)
830:
831: #define STACK_POS_END(k) do {\
832: k = stk;\
833: while (1) {\
834: k--;\
835: STACK_BASE_CHECK(k, "STACK_POS_END"); \
836: if (IS_TO_VOID_TARGET(k)) {\
837: k->type = STK_VOID;\
838: }\
839: else if (k->type == STK_POS) {\
840: k->type = STK_VOID;\
841: break;\
842: }\
843: }\
844: } while(0)
845:
846: #define STACK_STOP_BT_END do {\
847: StackType *k = stk;\
848: while (1) {\
849: k--;\
850: STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
851: if (IS_TO_VOID_TARGET(k)) {\
852: k->type = STK_VOID;\
853: }\
854: else if (k->type == STK_STOP_BT) {\
855: k->type = STK_VOID;\
856: break;\
857: }\
858: }\
859: } while(0)
860:
861: #define STACK_NULL_CHECK(isnull,id,s) do {\
862: StackType* k = stk;\
863: while (1) {\
864: k--;\
865: STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
866: if (k->type == STK_NULL_CHECK_START) {\
867: if (k->u.null_check.num == (id)) {\
868: (isnull) = (k->u.null_check.pstr == (s));\
869: break;\
870: }\
871: }\
872: }\
873: } while(0)
874:
875: #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
876: int level = 0;\
877: StackType* k = stk;\
878: while (1) {\
879: k--;\
880: STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
881: if (k->type == STK_NULL_CHECK_START) {\
882: if (k->u.null_check.num == (id)) {\
883: if (level == 0) {\
884: (isnull) = (k->u.null_check.pstr == (s));\
885: break;\
886: }\
887: else level--;\
888: }\
889: }\
890: else if (k->type == STK_NULL_CHECK_END) {\
891: level++;\
892: }\
893: }\
894: } while(0)
895:
896: #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
897: StackType* k = stk;\
898: while (1) {\
899: k--;\
900: STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
901: if (k->type == STK_NULL_CHECK_START) {\
902: if (k->u.null_check.num == (id)) {\
903: if (k->u.null_check.pstr != (s)) {\
904: (isnull) = 0;\
905: break;\
906: }\
907: else {\
908: UChar* endp;\
909: (isnull) = 1;\
910: while (k < stk) {\
911: if (k->type == STK_MEM_START) {\
912: if (k->u.mem.end == INVALID_STACK_INDEX) {\
913: (isnull) = 0; break;\
914: }\
915: if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
916: endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
917: else\
918: endp = (UChar* )k->u.mem.end;\
919: if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
920: (isnull) = 0; break;\
921: }\
922: else if (endp != s) {\
923: (isnull) = -1; /* empty, but position changed */ \
924: }\
925: }\
926: k++;\
927: }\
928: break;\
929: }\
930: }\
931: }\
932: }\
933: } while(0)
934:
935: #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
936: int level = 0;\
937: StackType* k = stk;\
938: while (1) {\
939: k--;\
940: STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
941: if (k->type == STK_NULL_CHECK_START) {\
942: if (k->u.null_check.num == (id)) {\
943: if (level == 0) {\
944: if (k->u.null_check.pstr != (s)) {\
945: (isnull) = 0;\
946: break;\
947: }\
948: else {\
949: UChar* endp;\
950: (isnull) = 1;\
951: while (k < stk) {\
952: if (k->type == STK_MEM_START) {\
953: if (k->u.mem.end == INVALID_STACK_INDEX) {\
954: (isnull) = 0; break;\
955: }\
956: if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
957: endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
958: else\
959: endp = (UChar* )k->u.mem.end;\
960: if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
961: (isnull) = 0; break;\
962: }\
963: else if (endp != s) {\
964: (isnull) = -1; /* empty, but position changed */ \
965: }\
966: }\
967: k++;\
968: }\
969: break;\
970: }\
971: }\
972: else {\
973: level--;\
974: }\
975: }\
976: }\
977: else if (k->type == STK_NULL_CHECK_END) {\
978: if (k->u.null_check.num == (id)) level++;\
979: }\
980: }\
981: } while(0)
982:
983: #define STACK_GET_REPEAT(id, k) do {\
984: int level = 0;\
985: k = stk;\
986: while (1) {\
987: k--;\
988: STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
989: if (k->type == STK_REPEAT) {\
990: if (level == 0) {\
991: if (k->u.repeat.num == (id)) {\
992: break;\
993: }\
994: }\
995: }\
996: else if (k->type == STK_CALL_FRAME) level--;\
997: else if (k->type == STK_RETURN) level++;\
998: }\
999: } while (0)
1000:
1001: #define STACK_RETURN(addr) do {\
1002: int level = 0;\
1003: StackType* k = stk;\
1004: while (1) {\
1005: k--;\
1006: STACK_BASE_CHECK(k, "STACK_RETURN"); \
1007: if (k->type == STK_CALL_FRAME) {\
1008: if (level == 0) {\
1009: (addr) = k->u.call_frame.ret_addr;\
1010: break;\
1011: }\
1012: else level--;\
1013: }\
1014: else if (k->type == STK_RETURN)\
1015: level++;\
1016: }\
1017: } while(0)
1018:
1019:
1020: #define STRING_CMP(s1,s2,len) do {\
1021: while (len-- > 0) {\
1022: if (*s1++ != *s2++) goto fail;\
1023: }\
1024: } while(0)
1025:
1026: #define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
1027: if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
1028: goto fail; \
1029: } while(0)
1030:
1031: static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
1032: UChar* s1, UChar** ps2, int mblen)
1033: {
1034: UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
1035: UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
1036: UChar *p1, *p2, *end, *s2, *end2;
1037: int len1, len2;
1038:
1039: s2 = *ps2;
1040: end = s1 + mblen;
1041: end2 = s2 + mblen;
1042: while (s1 < end) {
1043: len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
1044: len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
1045: if (len1 != len2) return 0;
1046: p1 = buf1;
1047: p2 = buf2;
1048: while (len1-- > 0) {
1049: if (*p1 != *p2) return 0;
1050: p1++;
1051: p2++;
1052: }
1053: }
1054:
1055: *ps2 = s2;
1056: return 1;
1057: }
1058:
1059: #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1060: is_fail = 0;\
1061: while (len-- > 0) {\
1062: if (*s1++ != *s2++) {\
1063: is_fail = 1; break;\
1064: }\
1065: }\
1066: } while(0)
1067:
1068: #define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
1069: if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
1070: is_fail = 1; \
1071: else \
1072: is_fail = 0; \
1073: } while(0)
1074:
1075:
1076: #define ON_STR_BEGIN(s) ((s) == str)
1077: #define ON_STR_END(s) ((s) == end)
1078: #define IS_EMPTY_STR (str == end)
1079:
1080: #define DATA_ENSURE(n) \
1081: if (s + (n) > end) goto fail
1082:
1083: #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1084:
1085: #ifdef USE_CAPTURE_HISTORY
1086: static int
1087: make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
1088: StackType* stk_top, UChar* str, regex_t* reg)
1089: {
1090: int n, r;
1091: OnigCaptureTreeNode* child;
1092: StackType* k = *kp;
1093:
1094: while (k < stk_top) {
1095: if (k->type == STK_MEM_START) {
1096: n = k->u.mem.num;
1097: if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1098: BIT_STATUS_AT(reg->capture_history, n) != 0) {
1099: child = history_node_new();
1100: CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
1101: child->group = n;
1102: child->beg = (int )(k->u.mem.pstr - str);
1103: r = history_tree_add_child(node, child);
1104: if (r != 0) return r;
1105: *kp = (k + 1);
1106: r = make_capture_history_tree(child, kp, stk_top, str, reg);
1107: if (r != 0) return r;
1108:
1109: k = *kp;
1110: child->end = (int )(k->u.mem.pstr - str);
1111: }
1112: }
1113: else if (k->type == STK_MEM_END) {
1114: if (k->u.mem.num == node->group) {
1115: node->end = (int )(k->u.mem.pstr - str);
1116: *kp = k;
1117: return 0;
1118: }
1119: }
1120: k++;
1121: }
1122:
1123: return 1; /* 1: root node ending. */
1124: }
1125: #endif
1126:
1127: #ifdef USE_BACKREF_AT_LEVEL
1128: static int mem_is_in_memp(int mem, int num, UChar* memp)
1129: {
1130: int i;
1131: MemNumType m;
1132:
1133: for (i = 0; i < num; i++) {
1134: GET_MEMNUM_INC(m, memp);
1135: if (mem == (int )m) return 1;
1136: }
1137: return 0;
1138: }
1139:
1140: static int backref_match_at_nested_level(regex_t* reg
1141: , StackType* top, StackType* stk_base
1142: , int ignore_case, int ambig_flag
1143: , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1144: {
1145: UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1146: int level;
1147: StackType* k;
1148:
1149: level = 0;
1150: k = top;
1151: k--;
1152: while (k >= stk_base) {
1153: if (k->type == STK_CALL_FRAME) {
1154: level--;
1155: }
1156: else if (k->type == STK_RETURN) {
1157: level++;
1158: }
1159: else if (level == nest) {
1160: if (k->type == STK_MEM_START) {
1161: if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1162: pstart = k->u.mem.pstr;
1163: if (pend != NULL_UCHARP) {
1164: if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1165: p = pstart;
1166: ss = *s;
1167:
1168: if (ignore_case != 0) {
1169: if (string_cmp_ic(reg->enc, ambig_flag,
1170: pstart, &ss, (int )(pend - pstart)) == 0)
1171: return 0; /* or goto next_mem; */
1172: }
1173: else {
1174: while (p < pend) {
1175: if (*p++ != *ss++) return 0; /* or goto next_mem; */
1176: }
1177: }
1178:
1179: *s = ss;
1180: return 1;
1181: }
1182: }
1183: }
1184: else if (k->type == STK_MEM_END) {
1185: if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1186: pend = k->u.mem.pstr;
1187: }
1188: }
1189: }
1190: k--;
1191: }
1192:
1193: return 0;
1194: }
1195: #endif /* USE_BACKREF_AT_LEVEL */
1196:
1197:
1198: #ifdef RUBY_PLATFORM
1199:
1200: typedef struct {
1201: int state;
1202: regex_t* reg;
1203: MatchArg* msa;
1204: StackType* stk_base;
1205: } TrapEnsureArg;
1206:
1207: static VALUE
1208: trap_ensure(VALUE arg)
1209: {
1210: TrapEnsureArg* ta = (TrapEnsureArg* )arg;
1211:
1212: if (ta->state == 0) { /* trap_exec() is not normal return */
1213: ONIG_STATE_DEC_THREAD(ta->reg);
1214: if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
1215: xfree(ta->stk_base);
1216:
1217: MATCH_ARG_FREE(*(ta->msa));
1218: }
1219:
1220: return Qnil;
1221: }
1222:
1223: static VALUE
1224: trap_exec(VALUE arg)
1225: {
1226: TrapEnsureArg* ta;
1227:
1228: rb_trap_exec();
1229:
1230: ta = (TrapEnsureArg* )arg;
1231: ta->state = 1; /* normal return */
1232: return Qnil;
1233: }
1234:
1235: extern void
1236: onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
1237: {
1238: VALUE arg;
1239: TrapEnsureArg ta;
1240:
1241: ta.state = 0;
1242: ta.reg = reg;
1243: ta.msa = msa;
1244: ta.stk_base = stk_base;
1245: arg = (VALUE )(&ta);
1246: rb_ensure(trap_exec, arg, trap_ensure, arg);
1247: }
1248:
1249: #define CHECK_INTERRUPT_IN_MATCH_AT do {\
1250: if (rb_trap_pending) {\
1251: if (! rb_prohibit_interrupt) {\
1252: onig_exec_trap(reg, msa, stk_base);\
1253: }\
1254: }\
1255: } while (0)
1256: #else
1257: #define CHECK_INTERRUPT_IN_MATCH_AT
1258: #endif /* RUBY_PLATFORM */
1259:
1260: #ifdef ONIG_DEBUG_STATISTICS
1261:
1262: #define USE_TIMEOFDAY
1263:
1264: #ifdef USE_TIMEOFDAY
1265: #ifdef HAVE_SYS_TIME_H
1266: #include <sys/time.h>
1267: #endif
1268: #ifdef HAVE_UNISTD_H
1269: #include <unistd.h>
1270: #endif
1271: static struct timeval ts, te;
1272: #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1273: #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1274: (((te).tv_sec - (ts).tv_sec)*1000000))
1275: #else
1276: #ifdef HAVE_SYS_TIMES_H
1277: #include <sys/times.h>
1278: #endif
1279: static struct tms ts, te;
1280: #define GETTIME(t) times(&(t))
1281: #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1282: #endif
1283:
1284: static int OpCounter[256];
1285: static int OpPrevCounter[256];
1286: static unsigned long OpTime[256];
1287: static int OpCurr = OP_FINISH;
1288: static int OpPrevTarget = OP_FAIL;
1289: static int MaxStackDepth = 0;
1290:
1291: #define STAT_OP_IN(opcode) do {\
1292: if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1293: OpCurr = opcode;\
1294: OpCounter[opcode]++;\
1295: GETTIME(ts);\
1296: } while (0)
1297:
1298: #define STAT_OP_OUT do {\
1299: GETTIME(te);\
1300: OpTime[OpCurr] += TIMEDIFF(te, ts);\
1301: } while (0)
1302:
1303: #ifdef RUBY_PLATFORM
1304:
1305: /*
1306: * :nodoc:
1307: */
1308: static VALUE onig_stat_print(void)
1309: {
1310: onig_print_statistics(stderr);
1311: return Qnil;
1312: }
1313: #endif
1314:
1315: extern void onig_statistics_init(void)
1316: {
1317: int i;
1318: for (i = 0; i < 256; i++) {
1319: OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1320: }
1321: MaxStackDepth = 0;
1322:
1323: #ifdef RUBY_PLATFORM
1324: rb_define_global_function("onig_stat_print", onig_stat_print, 0);
1325: #endif
1326: }
1327:
1328: extern void
1329: onig_print_statistics(FILE* f)
1330: {
1331: int i;
1332: fprintf(f, " count prev time\n");
1333: for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1334: fprintf(f, "%8d: %8d: %10ld: %s\n",
1335: OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1336: }
1337: fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1338: }
1339:
1340: #define STACK_INC do {\
1341: stk++;\
1342: if (stk - stk_base > MaxStackDepth) \
1343: MaxStackDepth = stk - stk_base;\
1344: } while (0)
1345:
1346: #else
1347: #define STACK_INC stk++
1348:
1349: #define STAT_OP_IN(opcode)
1350: #define STAT_OP_OUT
1351: #endif
1352:
1353: extern int
1354: onig_is_in_code_range(const UChar* p, OnigCodePoint code)
1355: {
1356: OnigCodePoint n, *data;
1357: OnigCodePoint low, high, x;
1358:
1359: GET_CODE_POINT(n, p);
1360: data = (OnigCodePoint* )p;
1361: data++;
1362:
1363: for (low = 0, high = n; low < high; ) {
1364: x = (low + high) >> 1;
1365: if (code > data[x * 2 + 1])
1366: low = x + 1;
1367: else
1368: high = x;
1369: }
1370:
1371: return ((low < n && code >= data[low * 2]) ? 1 : 0);
1372: }
1373:
1374: static int
1375: is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
1376: {
1377: int found;
1378:
1379: if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
1380: if (IS_NULL(cc->mbuf)) {
1381: found = 0;
1382: }
1383: else {
1384: found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
1385: }
1386: }
1387: else {
1388: found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
1389: }
1390:
1391: if (IS_CCLASS_NOT(cc))
1392: return !found;
1393: else
1394: return found;
1395: }
1396:
1397: extern int
1398: onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
1399: {
1400: int len;
1401:
1402: if (ONIGENC_MBC_MINLEN(enc) > 1) {
1403: len = 2;
1404: }
1405: else {
1406: len = ONIGENC_CODE_TO_MBCLEN(enc, code);
1407: }
1408: return is_code_in_cc(len, code, cc);
1409: }
1410:
1411:
1412: /* matching region of POSIX API */
1413: typedef int regoff_t;
1414:
1415: typedef struct {
1416: regoff_t rm_so;
1417: regoff_t rm_eo;
1418: } posix_regmatch_t;
1419:
1420: /* match data(str - end) from position (sstart). */
1421: /* if sstart == str then set sprev to NULL. */
1422: static int
1423: match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
1424: UChar* sprev, MatchArg* msa)
1425: {
1426: static UChar FinishCode[] = { OP_FINISH };
1427:
1428: int i, n, num_mem, best_len, pop_level;
1429: LengthType tlen, tlen2;
1430: MemNumType mem;
1431: RelAddrType addr;
1432: OnigOptionType option = reg->options;
1433: OnigEncoding encode = reg->enc;
1434: OnigAmbigType ambig_flag = reg->ambig_flag;
1435: UChar *s, *q, *sbegin;
1436: UChar *p = reg->p;
1437: char *alloca_base;
1438: StackType *stk_alloc, *stk_base, *stk, *stk_end;
1439: StackType *stkp; /* used as any purpose. */
1440: StackIndex si;
1441: StackIndex *repeat_stk;
1442: StackIndex *mem_start_stk, *mem_end_stk;
1443: #ifdef USE_COMBINATION_EXPLOSION_CHECK
1444: int scv;
1445: unsigned char* state_check_buff = msa->state_check_buff;
1446: int num_comb_exp_check = reg->num_comb_exp_check;
1447: #endif
1448: n = reg->num_repeat + reg->num_mem * 2;
1449:
1450: STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1451: pop_level = reg->stack_pop_level;
1452: num_mem = reg->num_mem;
1453: repeat_stk = (StackIndex* )alloca_base;
1454:
1455: mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
1456: mem_end_stk = mem_start_stk + num_mem;
1457: mem_start_stk--; /* for index start from 1,
1458: mem_start_stk[1]..mem_start_stk[num_mem] */
1459: mem_end_stk--; /* for index start from 1,
1460: mem_end_stk[1]..mem_end_stk[num_mem] */
1461: for (i = 1; i <= num_mem; i++) {
1462: mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1463: }
1464:
1465: #ifdef ONIG_DEBUG_MATCH
1466: fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1467: (int )str, (int )end, (int )sstart, (int )sprev);
1468: fprintf(stderr, "size: %d, start offset: %d\n",
1469: (int )(end - str), (int )(sstart - str));
1470: #endif
1471:
1472: STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1473: best_len = ONIG_MISMATCH;
1474: s = (UChar* )sstart;
1475: while (1) {
1476: #ifdef ONIG_DEBUG_MATCH
1477: {
1478: UChar *q, *bp, buf[50];
1479: int len;
1480: fprintf(stderr, "%4d> \"", (int )(s - str));
1481: bp = buf;
1482: for (i = 0, q = s; i < 7 && q < end; i++) {
1483: len = enc_len(encode, q);
1484: while (len-- > 0) *bp++ = *q++;
1485: }
1486: if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1487: else { xmemcpy(bp, "\"", 1); bp += 1; }
1488: *bp = 0;
1489: fputs(buf, stderr);
1490: for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1491: onig_print_compiled_byte_code(stderr, p, NULL, encode);
1492: fprintf(stderr, "\n");
1493: }
1494: #endif
1495:
1496: sbegin = s;
1497: switch (*p++) {
1498: case OP_END: STAT_OP_IN(OP_END);
1499: n = s - sstart;
1500: if (n > best_len) {
1501: OnigRegion* region;
1502: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1503: if (IS_FIND_LONGEST(option)) {
1504: if (n > msa->best_len) {
1505: msa->best_len = n;
1506: msa->best_s = (UChar* )sstart;
1507: }
1508: else
1509: goto end_best_len;
1510: }
1511: #endif
1512: best_len = n;
1513: region = msa->region;
1514: if (region) {
1515: #ifdef USE_POSIX_REGION_OPTION
1516: if (IS_POSIX_REGION(msa->options)) {
1517: posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1518:
1519: rmt[0].rm_so = sstart - str;
1520: rmt[0].rm_eo = s - str;
1521: for (i = 1; i <= num_mem; i++) {
1522: if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1523: if (BIT_STATUS_AT(reg->bt_mem_start, i))
1524: rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1525: else
1526: rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
1527:
1528: rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
1529: ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1530: : (UChar* )((void* )mem_end_stk[i])) - str;
1531: }
1532: else {
1533: rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1534: }
1535: }
1536: }
1537: else {
1538: #endif /* USE_POSIX_REGION_OPTION */
1539: region->beg[0] = sstart - str;
1540: region->end[0] = s - str;
1541: for (i = 1; i <= num_mem; i++) {
1542: if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1543: if (BIT_STATUS_AT(reg->bt_mem_start, i))
1544: region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1545: else
1546: region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1547:
1548: region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1549: ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1550: : (UChar* )((void* )mem_end_stk[i])) - str;
1551: }
1552: else {
1553: region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1554: }
1555: }
1556:
1557: #ifdef USE_CAPTURE_HISTORY
1558: if (reg->capture_history != 0) {
1559: int r;
1560: OnigCaptureTreeNode* node;
1561:
1562: if (IS_NULL(region->history_root)) {
1563: region->history_root = node = history_node_new();
1564: CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
1565: }
1566: else {
1567: node = region->history_root;
1568: history_tree_clear(node);
1569: }
1570:
1571: node->group = 0;
1572: node->beg = sstart - str;
1573: node->end = s - str;
1574:
1575: stkp = stk_base;
1576: r = make_capture_history_tree(region->history_root, &stkp,
1577: stk, (UChar* )str, reg);
1578: if (r < 0) {
1579: best_len = r; /* error code */
1580: goto finish;
1581: }
1582: }
1583: #endif /* USE_CAPTURE_HISTORY */
1584: #ifdef USE_POSIX_REGION_OPTION
1585: } /* else IS_POSIX_REGION() */
1586: #endif
1587: } /* if (region) */
1588: } /* n > best_len */
1589:
1590: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1591: end_best_len:
1592: #endif
1593: STAT_OP_OUT;
1594:
1595: if (IS_FIND_CONDITION(option)) {
1596: if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1597: best_len = ONIG_MISMATCH;
1598: goto fail; /* for retry */
1599: }
1600: if (IS_FIND_LONGEST(option) && s < end) {
1601: goto fail; /* for retry */
1602: }
1603: }
1604:
1605: /* default behavior: return first-matching result. */
1606: goto finish;
1607: break;
1608:
1609: case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
1610: #if 0
1611: DATA_ENSURE(1);
1612: if (*p != *s) goto fail;
1613: p++; s++;
1614: #endif
1615: if (*p != *s++) goto fail;
1616: DATA_ENSURE(0);
1617: p++;
1618: STAT_OP_OUT;
1619: break;
1620:
1621: case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
1622: {
1623: int len;
1624: UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
1625:
1626: DATA_ENSURE(1);
1627: ss = s;
1628: sp = p;
1629:
1630: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
1631: DATA_ENSURE(0);
1632: q = lowbuf;
1633: while (len-- > 0) {
1634: if (*p != *q) {
1635: goto fail;
1636: }
1637: p++; q++;
1638: }
1639: }
1640: STAT_OP_OUT;
1641: break;
1642:
1643: case OP_EXACT2: STAT_OP_IN(OP_EXACT2);
1644: DATA_ENSURE(2);
1645: if (*p != *s) goto fail;
1646: p++; s++;
1647: if (*p != *s) goto fail;
1648: sprev = s;
1649: p++; s++;
1650: STAT_OP_OUT;
1651: continue;
1652: break;
1653:
1654: case OP_EXACT3: STAT_OP_IN(OP_EXACT3);
1655: DATA_ENSURE(3);
1656: if (*p != *s) goto fail;
1657: p++; s++;
1658: if (*p != *s) goto fail;
1659: p++; s++;
1660: if (*p != *s) goto fail;
1661: sprev = s;
1662: p++; s++;
1663: STAT_OP_OUT;
1664: continue;
1665: break;
1666:
1667: case OP_EXACT4: STAT_OP_IN(OP_EXACT4);
1668: DATA_ENSURE(4);
1669: if (*p != *s) goto fail;
1670: p++; s++;
1671: if (*p != *s) goto fail;
1672: p++; s++;
1673: if (*p != *s) goto fail;
1674: p++; s++;
1675: if (*p != *s) goto fail;
1676: sprev = s;
1677: p++; s++;
1678: STAT_OP_OUT;
1679: continue;
1680: break;
1681:
1682: case OP_EXACT5: STAT_OP_IN(OP_EXACT5);
1683: DATA_ENSURE(5);
1684: if (*p != *s) goto fail;
1685: p++; s++;
1686: if (*p != *s) goto fail;
1687: p++; s++;
1688: if (*p != *s) goto fail;
1689: p++; s++;
1690: if (*p != *s) goto fail;
1691: p++; s++;
1692: if (*p != *s) goto fail;
1693: sprev = s;
1694: p++; s++;
1695: STAT_OP_OUT;
1696: continue;
1697: break;
1698:
1699: case OP_EXACTN: STAT_OP_IN(OP_EXACTN);
1700: GET_LENGTH_INC(tlen, p);
1701: DATA_ENSURE(tlen);
1702: while (tlen-- > 0) {
1703: if (*p++ != *s++) goto fail;
1704: }
1705: sprev = s - 1;
1706: STAT_OP_OUT;
1707: continue;
1708: break;
1709:
1710: case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
1711: {
1712: int len;
1713: UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
1714:
1715: GET_LENGTH_INC(tlen, p);
1716: endp = p + tlen;
1717:
1718: while (p < endp) {
1719: sprev = s;
1720: DATA_ENSURE(1);
1721: ss = s;
1722: sp = p;
1723:
1724: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
1725: DATA_ENSURE(0);
1726: q = lowbuf;
1727: while (len-- > 0) {
1728: if (*p != *q) {
1729: goto fail;
1730: }
1731: p++; q++;
1732: }
1733: }
1734: }
1735:
1736: STAT_OP_OUT;
1737: continue;
1738: break;
1739:
1740: case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1);
1741: DATA_ENSURE(2);
1742: if (*p != *s) goto fail;
1743: p++; s++;
1744: if (*p != *s) goto fail;
1745: p++; s++;
1746: STAT_OP_OUT;
1747: break;
1748:
1749: case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2);
1750: DATA_ENSURE(4);
1751: if (*p != *s) goto fail;
1752: p++; s++;
1753: if (*p != *s) goto fail;
1754: p++; s++;
1755: sprev = s;
1756: if (*p != *s) goto fail;
1757: p++; s++;
1758: if (*p != *s) goto fail;
1759: p++; s++;
1760: STAT_OP_OUT;
1761: continue;
1762: break;
1763:
1764: case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3);
1765: DATA_ENSURE(6);
1766: if (*p != *s) goto fail;
1767: p++; s++;
1768: if (*p != *s) goto fail;
1769: p++; s++;
1770: if (*p != *s) goto fail;
1771: p++; s++;
1772: if (*p != *s) goto fail;
1773: p++; s++;
1774: sprev = s;
1775: if (*p != *s) goto fail;
1776: p++; s++;
1777: if (*p != *s) goto fail;
1778: p++; s++;
1779: STAT_OP_OUT;
1780: continue;
1781: break;
1782:
1783: case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N);
1784: GET_LENGTH_INC(tlen, p);
1785: DATA_ENSURE(tlen * 2);
1786: while (tlen-- > 0) {
1787: if (*p != *s) goto fail;
1788: p++; s++;
1789: if (*p != *s) goto fail;
1790: p++; s++;
1791: }
1792: sprev = s - 2;
1793: STAT_OP_OUT;
1794: continue;
1795: break;
1796:
1797: case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N);
1798: GET_LENGTH_INC(tlen, p);
1799: DATA_ENSURE(tlen * 3);
1800: while (tlen-- > 0) {
1801: if (*p != *s) goto fail;
1802: p++; s++;
1803: if (*p != *s) goto fail;
1804: p++; s++;
1805: if (*p != *s) goto fail;
1806: p++; s++;
1807: }
1808: sprev = s - 3;
1809: STAT_OP_OUT;
1810: continue;
1811: break;
1812:
1813: case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN);
1814: GET_LENGTH_INC(tlen, p); /* mb-len */
1815: GET_LENGTH_INC(tlen2, p); /* string len */
1816: tlen2 *= tlen;
1817: DATA_ENSURE(tlen2);
1818: while (tlen2-- > 0) {
1819: if (*p != *s) goto fail;
1820: p++; s++;
1821: }
1822: sprev = s - tlen;
1823: STAT_OP_OUT;
1824: continue;
1825: break;
1826:
1827: case OP_CCLASS: STAT_OP_IN(OP_CCLASS);
1828: DATA_ENSURE(1);
1829: if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1830: p += SIZE_BITSET;
1831: s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1832: STAT_OP_OUT;
1833: break;
1834:
1835: case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
1836: if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1837:
1838: cclass_mb:
1839: GET_LENGTH_INC(tlen, p);
1840: {
1841: OnigCodePoint code;
1842: UChar *ss;
1843: int mb_len;
1844:
1845: DATA_ENSURE(1);
1846: mb_len = enc_len(encode, s);
1847: DATA_ENSURE(mb_len);
1848: ss = s;
1849: s += mb_len;
1850: code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1851:
1852: #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1853: if (! onig_is_in_code_range(p, code)) goto fail;
1854: #else
1855: q = p;
1856: ALIGNMENT_RIGHT(q);
1857: if (! onig_is_in_code_range(q, code)) goto fail;
1858: #endif
1859: }
1860: p += tlen;
1861: STAT_OP_OUT;
1862: break;
1863:
1864: case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
1865: DATA_ENSURE(1);
1866: if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1867: p += SIZE_BITSET;
1868: goto cclass_mb;
1869: }
1870: else {
1871: if (BITSET_AT(((BitSetRef )p), *s) == 0)
1872: goto fail;
1873:
1874: p += SIZE_BITSET;
1875: GET_LENGTH_INC(tlen, p);
1876: p += tlen;
1877: s++;
1878: }
1879: STAT_OP_OUT;
1880: break;
1881:
1882: case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT);
1883: DATA_ENSURE(1);
1884: if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1885: p += SIZE_BITSET;
1886: s += enc_len(encode, s);
1887: STAT_OP_OUT;
1888: break;
1889:
1890: case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
1891: DATA_ENSURE(1);
1892: if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1893: s++;
1894: GET_LENGTH_INC(tlen, p);
1895: p += tlen;
1896: goto cc_mb_not_success;
1897: }
1898:
1899: cclass_mb_not:
1900: GET_LENGTH_INC(tlen, p);
1901: {
1902: OnigCodePoint code;
1903: UChar *ss;
1904: int mb_len = enc_len(encode, s);
1905:
1906: if (s + mb_len > end) {
1907: DATA_ENSURE(1);
1908: s = (UChar* )end;
1909: p += tlen;
1910: goto cc_mb_not_success;
1911: }
1912:
1913: ss = s;
1914: s += mb_len;
1915: code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1916:
1917: #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1918: if (onig_is_in_code_range(p, code)) goto fail;
1919: #else
1920: q = p;
1921: ALIGNMENT_RIGHT(q);
1922: if (onig_is_in_code_range(q, code)) goto fail;
1923: #endif
1924: }
1925: p += tlen;
1926:
1927: cc_mb_not_success:
1928: STAT_OP_OUT;
1929: break;
1930:
1931: case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
1932: DATA_ENSURE(1);
1933: if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1934: p += SIZE_BITSET;
1935: goto cclass_mb_not;
1936: }
1937: else {
1938: if (BITSET_AT(((BitSetRef )p), *s) != 0)
1939: goto fail;
1940:
1941: p += SIZE_BITSET;
1942: GET_LENGTH_INC(tlen, p);
1943: p += tlen;
1944: s++;
1945: }
1946: STAT_OP_OUT;
1947: break;
1948:
1949: case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
1950: {
1951: OnigCodePoint code;
1952: void *node;
1953: int mb_len;
1954: UChar *ss;
1955:
1956: DATA_ENSURE(1);
1957: GET_POINTER_INC(node, p);
1958: mb_len = enc_len(encode, s);
1959: ss = s;
1960: s += mb_len;
1961: DATA_ENSURE(0);
1962: code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1963: if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
1964: }
1965: STAT_OP_OUT;
1966: break;
1967:
1968: case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
1969: DATA_ENSURE(1);
1970: n = enc_len(encode, s);
1971: DATA_ENSURE(n);
1972: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1973: s += n;
1974: STAT_OP_OUT;
1975: break;
1976:
1977: case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
1978: DATA_ENSURE(1);
1979: n = enc_len(encode, s);
1980: DATA_ENSURE(n);
1981: s += n;
1982: STAT_OP_OUT;
1983: break;
1984:
1985: case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
1986: while (s < end) {
1987: STACK_PUSH_ALT(p, s, sprev);
1988: n = enc_len(encode, s);
1989: DATA_ENSURE(n);
1990: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1991: sprev = s;
1992: s += n;
1993: }
1994: STAT_OP_OUT;
1995: break;
1996:
1997: case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
1998: while (s < end) {
1999: STACK_PUSH_ALT(p, s, sprev);
2000: n = enc_len(encode, s);
2001: if (n > 1) {
2002: DATA_ENSURE(n);
2003: sprev = s;
2004: s += n;
2005: }
2006: else {
2007: sprev = s;
2008: s++;
2009: }
2010: }
2011: STAT_OP_OUT;
2012: break;
2013:
2014: case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2015: while (s < end) {
2016: if (*p == *s) {
2017: STACK_PUSH_ALT(p + 1, s, sprev);
2018: }
2019: n = enc_len(encode, s);
2020: DATA_ENSURE(n);
2021: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2022: sprev = s;
2023: s += n;
2024: }
2025: p++;
2026: STAT_OP_OUT;
2027: break;
2028:
2029: case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2030: while (s < end) {
2031: if (*p == *s) {
2032: STACK_PUSH_ALT(p + 1, s, sprev);
2033: }
2034: n = enc_len(encode, s);
2035: if (n >1) {
2036: DATA_ENSURE(n);
2037: sprev = s;
2038: s += n;
2039: }
2040: else {
2041: sprev = s;
2042: s++;
2043: }
2044: }
2045: p++;
2046: STAT_OP_OUT;
2047: break;
2048:
2049: #ifdef USE_COMBINATION_EXPLOSION_CHECK
2050: case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2051: GET_STATE_CHECK_NUM_INC(mem, p);
2052: while (s < end) {
2053: STATE_CHECK_VAL(scv, mem);
2054: if (scv) goto fail;
2055:
2056: STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
2057: n = enc_len(encode, s);
2058: DATA_ENSURE(n);
2059: if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2060: sprev = s;
2061: s += n;
2062: }
2063: STAT_OP_OUT;
2064: break;
2065:
2066: case OP_STATE_CHECK_ANYCHAR_ML_STAR:
2067: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2068:
2069: GET_STATE_CHECK_NUM_INC(mem, p);
2070: while (s < end) {
2071: STATE_CHECK_VAL(scv, mem);
2072: if (scv) goto fail;
2073:
2074: STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
2075: n = enc_len(encode, s);
2076: if (n > 1) {
2077: DATA_ENSURE(n);
2078: sprev = s;
2079: s += n;
2080: }
2081: else {
2082: sprev = s;
2083: s++;
2084: }
2085: }
2086: STAT_OP_OUT;
2087: break;
2088: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2089:
2090: case OP_WORD: STAT_OP_IN(OP_WORD);
2091: DATA_ENSURE(1);
2092: if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2093: goto fail;
2094:
2095: s += enc_len(encode, s);
2096: STAT_OP_OUT;
2097: break;
2098:
2099: case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
2100: DATA_ENSURE(1);
2101: if (ONIGENC_IS_MBC_WORD(encode, s, end))
2102: goto fail;
2103:
2104: s += enc_len(encode, s);
2105: STAT_OP_OUT;
2106: break;
2107:
2108: case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND);
2109: if (ON_STR_BEGIN(s)) {
2110: DATA_ENSURE(1);
2111: if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2112: goto fail;
2113: }
2114: else if (ON_STR_END(s)) {
2115: if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2116: goto fail;
2117: }
2118: else {
2119: if (ONIGENC_IS_MBC_WORD(encode, s, end)
2120: == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2121: goto fail;
2122: }
2123: STAT_OP_OUT;
2124: continue;
2125: break;
2126:
2127: case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
2128: if (ON_STR_BEGIN(s)) {
2129: if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
2130: goto fail;
2131: }
2132: else if (ON_STR_END(s)) {
2133: if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2134: goto fail;
2135: }
2136: else {
2137: if (ONIGENC_IS_MBC_WORD(encode, s, end)
2138: != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2139: goto fail;
2140: }
2141: STAT_OP_OUT;
2142: continue;
2143: break;
2144:
2145: #ifdef USE_WORD_BEGIN_END
2146: case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
2147: if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2148: if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2149: STAT_OP_OUT;
2150: continue;
2151: }
2152: }
2153: goto fail;
2154: break;
2155:
2156: case OP_WORD_END: STAT_OP_IN(OP_WORD_END);
2157: if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2158: if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2159: STAT_OP_OUT;
2160: continue;
2161: }
2162: }
2163: goto fail;
2164: break;
2165: #endif
2166:
2167: case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF);
2168: if (! ON_STR_BEGIN(s)) goto fail;
2169:
2170: STAT_OP_OUT;
2171: continue;
2172: break;
2173:
2174: case OP_END_BUF: STAT_OP_IN(OP_END_BUF);
2175: if (! ON_STR_END(s)) goto fail;
2176:
2177: STAT_OP_OUT;
2178: continue;
2179: break;
2180:
2181: case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE);
2182: if (ON_STR_BEGIN(s)) {
2183: if (IS_NOTBOL(msa->options)) goto fail;
2184: STAT_OP_OUT;
2185: continue;
2186: }
2187: else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2188: STAT_OP_OUT;
2189: continue;
2190: }
2191: goto fail;
2192: break;
2193:
2194: case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
2195: if (ON_STR_END(s)) {
2196: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2197: if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2198: #endif
2199: if (IS_NOTEOL(msa->options)) goto fail;
2200: STAT_OP_OUT;
2201: continue;
2202: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2203: }
2204: #endif
2205: }
2206: else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2207: STAT_OP_OUT;
2208: continue;
2209: }
2210: #ifdef USE_CRNL_AS_LINE_TERMINATOR
2211: else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2212: STAT_OP_OUT;
2213: continue;
2214: }
2215: #endif
2216: goto fail;
2217: break;
2218:
2219: case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
2220: if (ON_STR_END(s)) {
2221: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2222: if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2223: #endif
2224: if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
2225: STAT_OP_OUT;
2226: continue;
2227: #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2228: }
2229: #endif
2230: }
2231: else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2232: ON_STR_END(s + enc_len(encode, s))) {
2233: STAT_OP_OUT;
2234: continue;
2235: }
2236: #ifdef USE_CRNL_AS_LINE_TERMINATOR
2237: else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2238: UChar* ss = s + enc_len(encode, s);
2239: if (ON_STR_END(ss + enc_len(encode, ss))) {
2240: STAT_OP_OUT;
2241: continue;
2242: }
2243: }
2244: #endif
2245: goto fail;
2246: break;
2247:
2248: case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION);
2249: if (s != msa->start)
2250: goto fail;
2251:
2252: STAT_OP_OUT;
2253: continue;
2254: break;
2255:
2256: case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH);
2257: GET_MEMNUM_INC(mem, p);
2258: STACK_PUSH_MEM_START(mem, s);
2259: STAT_OP_OUT;
2260: continue;
2261: break;
2262:
2263: case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
2264: GET_MEMNUM_INC(mem, p);
2265: mem_start_stk[mem] = (StackIndex )((void* )s);
2266: STAT_OP_OUT;
2267: continue;
2268: break;
2269:
2270: case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH);
2271: GET_MEMNUM_INC(mem, p);
2272: STACK_PUSH_MEM_END(mem, s);
2273: STAT_OP_OUT;
2274: continue;
2275: break;
2276:
2277: case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
2278: GET_MEMNUM_INC(mem, p);
2279: mem_end_stk[mem] = (StackIndex )((void* )s);
2280: STAT_OP_OUT;
2281: continue;
2282: break;
2283:
2284: #ifdef USE_SUBEXP_CALL
2285: case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
2286: GET_MEMNUM_INC(mem, p);
2287: STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2288: STACK_PUSH_MEM_END(mem, s);
2289: mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2290: STAT_OP_OUT;
2291: continue;
2292: break;
2293:
2294: case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
2295: GET_MEMNUM_INC(mem, p);
2296: mem_end_stk[mem] = (StackIndex )((void* )s);
2297: STACK_GET_MEM_START(mem, stkp);
2298:
2299: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2300: mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2301: else
2302: mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
2303:
2304: STACK_PUSH_MEM_END_MARK(mem);
2305: STAT_OP_OUT;
2306: continue;
2307: break;
2308: #endif
2309:
2310: case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1);
2311: mem = 1;
2312: goto backref;
2313: break;
2314:
2315: case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2);
2316: mem = 2;
2317: goto backref;
2318: break;
2319:
2320: case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
2321: GET_MEMNUM_INC(mem, p);
2322: backref:
2323: {
2324: int len;
2325: UChar *pstart, *pend;
2326:
2327: /* if you want to remove following line,
2328: you should check in parse and compile time. */
2329: if (mem > num_mem) goto fail;
2330: if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2331: if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2332:
2333: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2334: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2335: else
2336: pstart = (UChar* )((void* )mem_start_stk[mem]);
2337:
2338: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2339: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2340: : (UChar* )((void* )mem_end_stk[mem]));
2341: n = pend - pstart;
2342: DATA_ENSURE(n);
2343: sprev = s;
2344: STRING_CMP(pstart, s, n);
2345: while (sprev + (len = enc_len(encode, sprev)) < s)
2346: sprev += len;
2347:
2348: STAT_OP_OUT;
2349: continue;
2350: }
2351: break;
2352:
2353: case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC);
2354: GET_MEMNUM_INC(mem, p);
2355: {
2356: int len;
2357: UChar *pstart, *pend;
2358:
2359: /* if you want to remove following line,
2360: you should check in parse and compile time. */
2361: if (mem > num_mem) goto fail;
2362: if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2363: if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2364:
2365: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2366: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2367: else
2368: pstart = (UChar* )((void* )mem_start_stk[mem]);
2369:
2370: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2371: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2372: : (UChar* )((void* )mem_end_stk[mem]));
2373: n = pend - pstart;
2374: DATA_ENSURE(n);
2375: sprev = s;
2376: STRING_CMP_IC(ambig_flag, pstart, &s, n);
2377: while (sprev + (len = enc_len(encode, sprev)) < s)
2378: sprev += len;
2379:
2380: STAT_OP_OUT;
2381: continue;
2382: }
2383: break;
2384:
2385: case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI);
2386: {
2387: int len, is_fail;
2388: UChar *pstart, *pend, *swork;
2389:
2390: GET_LENGTH_INC(tlen, p);
2391: for (i = 0; i < tlen; i++) {
2392: GET_MEMNUM_INC(mem, p);
2393:
2394: if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2395: if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2396:
2397: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2398: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2399: else
2400: pstart = (UChar* )((void* )mem_start_stk[mem]);
2401:
2402: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2403: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2404: : (UChar* )((void* )mem_end_stk[mem]));
2405: n = pend - pstart;
2406: DATA_ENSURE(n);
2407: sprev = s;
2408: swork = s;
2409: STRING_CMP_VALUE(pstart, swork, n, is_fail);
2410: if (is_fail) continue;
2411: s = swork;
2412: while (sprev + (len = enc_len(encode, sprev)) < s)
2413: sprev += len;
2414:
2415: p += (SIZE_MEMNUM * (tlen - i - 1));
2416: break; /* success */
2417: }
2418: if (i == tlen) goto fail;
2419: STAT_OP_OUT;
2420: continue;
2421: }
2422: break;
2423:
2424: case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC);
2425: {
2426: int len, is_fail;
2427: UChar *pstart, *pend, *swork;
2428:
2429: GET_LENGTH_INC(tlen, p);
2430: for (i = 0; i < tlen; i++) {
2431: GET_MEMNUM_INC(mem, p);
2432:
2433: if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2434: if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2435:
2436: if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2437: pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2438: else
2439: pstart = (UChar* )((void* )mem_start_stk[mem]);
2440:
2441: pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2442: ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2443: : (UChar* )((void* )mem_end_stk[mem]));
2444: n = pend - pstart;
2445: DATA_ENSURE(n);
2446: sprev = s;
2447: swork = s;
2448: STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
2449: if (is_fail) continue;
2450: s = swork;
2451: while (sprev + (len = enc_len(encode, sprev)) < s)
2452: sprev += len;
2453:
2454: p += (SIZE_MEMNUM * (tlen - i - 1));
2455: break; /* success */
2456: }
2457: if (i == tlen) goto fail;
2458: STAT_OP_OUT;
2459: continue;
2460: }
2461: break;
2462:
2463: #ifdef USE_BACKREF_AT_LEVEL
2464: case OP_BACKREF_AT_LEVEL:
2465: {
2466: int len;
2467: OnigOptionType ic;
2468: LengthType level;
2469:
2470: GET_OPTION_INC(ic, p);
2471: GET_LENGTH_INC(level, p);
2472: GET_LENGTH_INC(tlen, p);
2473:
2474: sprev = s;
2475: if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
2476: , (int )level, (int )tlen, p, &s, end)) {
2477: while (sprev + (len = enc_len(encode, sprev)) < s)
2478: sprev += len;
2479:
2480: p += (SIZE_MEMNUM * tlen);
2481: }
2482: else
2483: goto fail;
2484:
2485: STAT_OP_OUT;
2486: continue;
2487: }
2488:
2489: break;
2490: #endif
2491:
2492: case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
2493: GET_OPTION_INC(option, p);
2494: STACK_PUSH_ALT(p, s, sprev);
2495: p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2496: STAT_OP_OUT;
2497: continue;
2498: break;
2499:
2500: case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
2501: GET_OPTION_INC(option, p);
2502: STAT_OP_OUT;
2503: continue;
2504: break;
2505:
2506: case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START);
2507: GET_MEMNUM_INC(mem, p); /* mem: null check id */
2508: STACK_PUSH_NULL_CHECK_START(mem, s);
2509: STAT_OP_OUT;
2510: continue;
2511: break;
2512:
2513: case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END);
2514: {
2515: int isnull;
2516:
2517: GET_MEMNUM_INC(mem, p); /* mem: null check id */
2518: STACK_NULL_CHECK(isnull, mem, s);
2519: if (isnull) {
2520: #ifdef ONIG_DEBUG_MATCH
2521: fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2522: (int )mem, (int )s);
2523: #endif
2524: null_check_found:
2525: /* empty loop founded, skip next instruction */
2526: switch (*p++) {
2527: case OP_JUMP:
2528: case OP_PUSH:
2529: p += SIZE_RELADDR;
2530: break;
2531: case OP_REPEAT_INC:
2532: case OP_REPEAT_INC_NG:
2533: case OP_REPEAT_INC_SG:
2534: case OP_REPEAT_INC_NG_SG:
2535: p += SIZE_MEMNUM;
2536: break;
2537: default:
2538: goto unexpected_bytecode_error;
2539: break;
2540: }
2541: }
2542: }
2543: STAT_OP_OUT;
2544: continue;
2545: break;
2546:
2547: #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
2548: case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
2549: {
2550: int isnull;
2551:
2552: GET_MEMNUM_INC(mem, p); /* mem: null check id */
2553: STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2554: if (isnull) {
2555: #ifdef ONIG_DEBUG_MATCH
2556: fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2557: (int )mem, (int )s);
2558: #endif
2559: if (isnull == -1) goto fail;
2560: goto null_check_found;
2561: }
2562: }
2563: STAT_OP_OUT;
2564: continue;
2565: break;
2566: #endif
2567:
2568: #ifdef USE_SUBEXP_CALL
2569: case OP_NULL_CHECK_END_MEMST_PUSH:
2570: STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2571: {
2572: int isnull;
2573:
2574: GET_MEMNUM_INC(mem, p); /* mem: null check id */
2575: #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
2576: STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2577: #else
2578: STACK_NULL_CHECK_REC(isnull, mem, s);
2579: #endif
2580: if (isnull) {
2581: #ifdef ONIG_DEBUG_MATCH
2582: fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2583: (int )mem, (int )s);
2584: #endif
2585: if (isnull == -1) goto fail;
2586: goto null_check_found;
2587: }
2588: else {
2589: STACK_PUSH_NULL_CHECK_END(mem);
2590: }
2591: }
2592: STAT_OP_OUT;
2593: continue;
2594: break;
2595: #endif
2596:
2597: case OP_JUMP: STAT_OP_IN(OP_JUMP);
2598: GET_RELADDR_INC(addr, p);
2599: p += addr;
2600: STAT_OP_OUT;
2601: CHECK_INTERRUPT_IN_MATCH_AT;
2602: continue;
2603: break;
2604:
2605: case OP_PUSH: STAT_OP_IN(OP_PUSH);
2606: GET_RELADDR_INC(addr, p);
2607: STACK_PUSH_ALT(p + addr, s, sprev);
2608: STAT_OP_OUT;
2609: continue;
2610: break;
2611:
2612: #ifdef USE_COMBINATION_EXPLOSION_CHECK
2613: case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
2614: GET_STATE_CHECK_NUM_INC(mem, p);
2615: STATE_CHECK_VAL(scv, mem);
2616: if (scv) goto fail;
2617:
2618: GET_RELADDR_INC(addr, p);
2619: STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2620: STAT_OP_OUT;
2621: continue;
2622: break;
2623:
2624: case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2625: GET_STATE_CHECK_NUM_INC(mem, p);
2626: GET_RELADDR_INC(addr, p);
2627: STATE_CHECK_VAL(scv, mem);
2628: if (scv) {
2629: p += addr;
2630: }
2631: else {
2632: STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2633: }
2634: STAT_OP_OUT;
2635: continue;
2636: break;
2637:
2638: case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
2639: GET_STATE_CHECK_NUM_INC(mem, p);
2640: STATE_CHECK_VAL(scv, mem);
2641: if (scv) goto fail;
2642:
2643: STACK_PUSH_STATE_CHECK(s, mem);
2644: STAT_OP_OUT;
2645: continue;
2646: break;
2647: #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2648:
2649: case OP_POP: STAT_OP_IN(OP_POP);
2650: STACK_POP_ONE;
2651: STAT_OP_OUT;
2652: continue;
2653: break;
2654:
2655: case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
2656: GET_RELADDR_INC(addr, p);
2657: if (*p == *s && DATA_ENSURE_CHECK(1)) {
2658: p++;
2659: STACK_PUSH_ALT(p + addr, s, sprev);
2660: STAT_OP_OUT;
2661: continue;
2662: }
2663: p += (addr + 1);
2664: STAT_OP_OUT;
2665: continue;
2666: break;
2667:
2668: case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
2669: GET_RELADDR_INC(addr, p);
2670: if (*p == *s) {
2671: p++;
2672: STACK_PUSH_ALT(p + addr, s, sprev);
2673: STAT_OP_OUT;
2674: continue;
2675: }
2676: p++;
2677: STAT_OP_OUT;
2678: continue;
2679: break;
2680:
2681: case OP_REPEAT: STAT_OP_IN(OP_REPEAT);
2682: {
2683: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2684: GET_RELADDR_INC(addr, p);
2685:
2686: STACK_ENSURE(1);
2687: repeat_stk[mem] = GET_STACK_INDEX(stk);
2688: STACK_PUSH_REPEAT(mem, p);
2689:
2690: if (reg->repeat_range[mem].lower == 0) {
2691: STACK_PUSH_ALT(p + addr, s, sprev);
2692: }
2693: }
2694: STAT_OP_OUT;
2695: continue;
2696: break;
2697:
2698: case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG);
2699: {
2700: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2701: GET_RELADDR_INC(addr, p);
2702:
2703: STACK_ENSURE(1);
2704: repeat_stk[mem] = GET_STACK_INDEX(stk);
2705: STACK_PUSH_REPEAT(mem, p);
2706:
2707: if (reg->repeat_range[mem].lower == 0) {
2708: STACK_PUSH_ALT(p, s, sprev);
2709: p += addr;
2710: }
2711: }
2712: STAT_OP_OUT;
2713: continue;
2714: break;
2715:
2716: case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
2717: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2718: si = repeat_stk[mem];
2719: stkp = STACK_AT(si);
2720:
2721: repeat_inc:
2722: stkp->u.repeat.count++;
2723: if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2724: /* end of repeat. Nothing to do. */
2725: }
2726: else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2727: STACK_PUSH_ALT(p, s, sprev);
2728: p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2729: }
2730: else {
2731: p = stkp->u.repeat.pcode;
2732: }
2733: STACK_PUSH_REPEAT_INC(si);
2734: STAT_OP_OUT;
2735: CHECK_INTERRUPT_IN_MATCH_AT;
2736: continue;
2737: break;
2738:
2739: case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG);
2740: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2741: STACK_GET_REPEAT(mem, stkp);
2742: si = GET_STACK_INDEX(stkp);
2743: goto repeat_inc;
2744: break;
2745:
2746: case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
2747: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2748: si = repeat_stk[mem];
2749: stkp = STACK_AT(si);
2750:
2751: repeat_inc_ng:
2752: stkp->u.repeat.count++;
2753: if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2754: if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2755: UChar* pcode = stkp->u.repeat.pcode;
2756:
2757: STACK_PUSH_REPEAT_INC(si);
2758: STACK_PUSH_ALT(pcode, s, sprev);
2759: }
2760: else {
2761: p = stkp->u.repeat.pcode;
2762: STACK_PUSH_REPEAT_INC(si);
2763: }
2764: }
2765: else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2766: STACK_PUSH_REPEAT_INC(si);
2767: }
2768: STAT_OP_OUT;
2769: CHECK_INTERRUPT_IN_MATCH_AT;
2770: continue;
2771: break;
2772:
2773: case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG);
2774: GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2775: STACK_GET_REPEAT(mem, stkp);
2776: si = GET_STACK_INDEX(stkp);
2777: goto repeat_inc_ng;
2778: break;
2779:
2780: case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
2781: STACK_PUSH_POS(s, sprev);
2782: STAT_OP_OUT;
2783: continue;
2784: break;
2785:
2786: case OP_POP_POS: STAT_OP_IN(OP_POP_POS);
2787: {
2788: STACK_POS_END(stkp);
2789: s = stkp->u.state.pstr;
2790: sprev = stkp->u.state.pstr_prev;
2791: }
2792: STAT_OP_OUT;
2793: continue;
2794: break;
2795:
2796: case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT);
2797: GET_RELADDR_INC(addr, p);
2798: STACK_PUSH_POS_NOT(p + addr, s, sprev);
2799: STAT_OP_OUT;
2800: continue;
2801: break;
2802:
2803: case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS);
2804: STACK_POP_TIL_POS_NOT;
2805: goto fail;
2806: break;
2807:
2808: case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT);
2809: STACK_PUSH_STOP_BT;
2810: STAT_OP_OUT;
2811: continue;
2812: break;
2813:
2814: case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT);
2815: STACK_STOP_BT_END;
2816: STAT_OP_OUT;
2817: continue;
2818: break;
2819:
2820: case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
2821: GET_LENGTH_INC(tlen, p);
2822: s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2823: if (IS_NULL(s)) goto fail;
2824: sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2825: STAT_OP_OUT;
2826: continue;
2827: break;
2828:
2829: case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2830: GET_RELADDR_INC(addr, p);
2831: GET_LENGTH_INC(tlen, p);
2832: q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2833: if (IS_NULL(q)) {
2834: /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2835: If you want to change to fail, replace following line. */
2836: p += addr;
2837: /* goto fail; */
2838: }
2839: else {
2840: STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2841: s = q;
2842: sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2843: }
2844: STAT_OP_OUT;
2845: continue;
2846: break;
2847:
2848: case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2849: STACK_POP_TIL_LOOK_BEHIND_NOT;
2850: goto fail;
2851: break;
2852:
2853: #ifdef USE_SUBEXP_CALL
2854: case OP_CALL: STAT_OP_IN(OP_CALL);
2855: GET_ABSADDR_INC(addr, p);
2856: STACK_PUSH_CALL_FRAME(p);
2857: p = reg->p + addr;
2858: STAT_OP_OUT;
2859: continue;
2860: break;
2861:
2862: case OP_RETURN: STAT_OP_IN(OP_RETURN);
2863: STACK_RETURN(p);
2864: STACK_PUSH_RETURN;
2865: STAT_OP_OUT;
2866: continue;
2867: break;
2868: #endif
2869:
2870: case OP_FINISH:
2871: goto finish;
2872: break;
2873:
2874: fail:
2875: STAT_OP_OUT;
2876: /* fall */
2877: case OP_FAIL: STAT_OP_IN(OP_FAIL);
2878: STACK_POP;
2879: p = stk->u.state.pcode;
2880: s = stk->u.state.pstr;
2881: sprev = stk->u.state.pstr_prev;
2882:
2883: #ifdef USE_COMBINATION_EXPLOSION_CHECK
2884: if (stk->u.state.state_check != 0) {
2885: stk->type = STK_STATE_CHECK_MARK;
2886: stk++;
2887: }
2888: #endif
2889:
2890: STAT_OP_OUT;
2891: continue;
2892: break;
2893:
2894: default:
2895: goto bytecode_error;
2896:
2897: } /* end of switch */
2898: sprev = sbegin;
2899: } /* end of while(1) */
2900:
2901: finish:
2902: STACK_SAVE;
2903: return best_len;
2904:
2905: #ifdef ONIG_DEBUG
2906: stack_error:
2907: STACK_SAVE;
2908: return ONIGERR_STACK_BUG;
2909: #endif
2910:
2911: bytecode_error:
2912: STACK_SAVE;
2913: return ONIGERR_UNDEFINED_BYTECODE;
2914:
2915: unexpected_bytecode_error:
2916: STACK_SAVE;
2917: return ONIGERR_UNEXPECTED_BYTECODE;
2918: }
2919:
2920:
2921: static UChar*
2922: slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2923: const UChar* text, const UChar* text_end, UChar* text_range)
2924: {
2925: UChar *t, *p, *s, *end;
2926:
2927: end = (UChar* )text_end;
2928: end -= target_end - target - 1;
2929: if (end > text_range)
2930: end = text_range;
2931:
2932: s = (UChar* )text;
2933:
2934: while (s < end) {
2935: if (*s == *target) {
2936: p = s + 1;
2937: t = target + 1;
2938: while (t < target_end) {
2939: if (*t != *p++)
2940: break;
2941: t++;
2942: }
2943: if (t == target_end)
2944: return s;
2945: }
2946: s += enc_len(enc, s);
2947: }
2948:
2949: return (UChar* )NULL;
2950: }
2951:
2952: static int
2953: str_lower_case_match(OnigEncoding enc, int ambig_flag,
2954: const UChar* t, const UChar* tend,
2955: const UChar* p, const UChar* end)
2956: {
2957: int lowlen;
2958: UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
2959: const UChar* tsave;
2960: const UChar* psave;
2961:
2962: tsave = t;
2963: psave = p;
2964:
2965: while (t < tend) {
2966: lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
2967: q = lowbuf;
2968: while (lowlen > 0) {
2969: if (*t++ != *q++) {
2970: return 0;
2971: }
2972: lowlen--;
2973: }
2974: }
2975:
2976: return 1;
2977: }
2978:
2979: static UChar*
2980: slow_search_ic(OnigEncoding enc, int ambig_flag,
2981: UChar* target, UChar* target_end,
2982: const UChar* text, const UChar* text_end, UChar* text_range)
2983: {
2984: UChar *s, *end;
2985:
2986: end = (UChar* )text_end;
2987: end -= target_end - target - 1;
2988: if (end > text_range)
2989: end = text_range;
2990:
2991: s = (UChar* )text;
2992:
2993: while (s < end) {
2994: if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
2995: return s;
2996:
2997: s += enc_len(enc, s);
2998: }
2999:
3000: return (UChar* )NULL;
3001: }
3002:
3003: static UChar*
3004: slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3005: const UChar* text, const UChar* adjust_text,
3006: const UChar* text_end, const UChar* text_start)
3007: {
3008: UChar *t, *p, *s;
3009:
3010: s = (UChar* )text_end;
3011: s -= (target_end - target);
3012: if (s > text_start)
3013: s = (UChar* )text_start;
3014: else
3015: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
3016:
3017: while (s >= text) {
3018: if (*s == *target) {
3019: p = s + 1;
3020: t = target + 1;
3021: while (t < target_end) {
3022: if (*t != *p++)
3023: break;
3024: t++;
3025: }
3026: if (t == target_end)
3027: return s;
3028: }
3029: s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
3030: }
3031:
3032: return (UChar* )NULL;
3033: }
3034:
3035: static UChar*
3036: slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
3037: UChar* target, UChar* target_end,
3038: const UChar* text, const UChar* adjust_text,
3039: const UChar* text_end, const UChar* text_start)
3040: {
3041: UChar *s;
3042:
3043: s = (UChar* )text_end;
3044: s -= (target_end - target);
3045: if (s > text_start)
3046: s = (UChar* )text_start;
3047: else
3048: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
3049:
3050: while (s >= text) {
3051: if (str_lower_case_match(enc, ambig_flag,
3052: target, target_end, s, text_end))
3053: return s;
3054:
3055: s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
3056: }
3057:
3058: return (UChar* )NULL;
3059: }
3060:
3061: static UChar*
3062: bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3063: const UChar* text, const UChar* text_end,
3064: const UChar* text_range)
3065: {
3066: const UChar *s, *se, *t, *p, *end;
3067: const UChar *tail;
3068: int skip, tlen1;
3069:
3070: #ifdef ONIG_DEBUG_SEARCH
3071: fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
3072: (int )text, (int )text_end, (int )text_range);
3073: #endif
3074:
3075: tail = target_end - 1;
3076: tlen1 = tail - target;
3077: end = text_range;
3078: if (end + tlen1 > text_end)
3079: end = text_end - tlen1;
3080:
3081: s = text;
3082:
3083: if (IS_NULL(reg->int_map)) {
3084: while (s < end) {
3085: p = se = s + tlen1;
3086: t = tail;
3087: while (t >= target && *p == *t) {
3088: p--; t--;
3089: }
3090: if (t < target) return (UChar* )s;
3091:
3092: skip = reg->map[*se];
3093: t = s;
3094: do {
3095: s += enc_len(reg->enc, s);
3096: } while ((s - t) < skip && s < end);
3097: }
3098: }
3099: else {
3100: while (s < end) {
3101: p = se = s + tlen1;
3102: t = tail;
3103: while (t >= target && *p == *t) {
3104: p--; t--;
3105: }
3106: if (t < target) return (UChar* )s;
3107:
3108: skip = reg->int_map[*se];
3109: t = s;
3110: do {
3111: s += enc_len(reg->enc, s);
3112: } while ((s - t) < skip && s < end);
3113: }
3114: }
3115:
3116: return (UChar* )NULL;
3117: }
3118:
3119: static UChar*
3120: bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3121: const UChar* text, const UChar* text_end, const UChar* text_range)
3122: {
3123: const UChar *s, *t, *p, *end;
3124: const UChar *tail;
3125:
3126: end = text_range + (target_end - target) - 1;
3127: if (end > text_end)
3128: end = text_end;
3129:
3130: tail = target_end - 1;
3131: s = text + (target_end - target) - 1;
3132: if (IS_NULL(reg->int_map)) {
3133: while (s < end) {
3134: p = s;
3135: t = tail;
3136: while (t >= target && *p == *t) {
3137: p--; t--;
3138: }
3139: if (t < target) return (UChar* )(p + 1);
3140: s += reg->map[*s];
3141: }
3142: }
3143: else { /* see int_map[] */
3144: while (s < end) {
3145: p = s;
3146: t = tail;
3147: while (t >= target && *p == *t) {
3148: p--; t--;
3149: }
3150: if (t < target) return (UChar* )(p + 1);
3151: s += reg->int_map[*s];
3152: }
3153: }
3154: return (UChar* )NULL;
3155: }
3156:
3157: static int
3158: set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
3159:
3160: {
3161: int i, len;
3162:
3163: if (IS_NULL(*skip)) {
3164: *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3165: if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3166: }
3167:
3168: len = end - s;
3169: for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3170: (*skip)[i] = len;
3171:
3172: for (i = len - 1; i > 0; i--)
3173: (*skip)[s[i]] = i;
3174:
3175: return 0;
3176: }
3177:
3178: static UChar*
3179: bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3180: const UChar* text, const UChar* adjust_text,
3181: const UChar* text_end, const UChar* text_start)
3182: {
3183: const UChar *s, *t, *p;
3184:
3185: s = text_end - (target_end - target);
3186: if (text_start < s)
3187: s = text_start;
3188: else
3189: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3190:
3191: while (s >= text) {
3192: p = s;
3193: t = target;
3194: while (t < target_end && *p == *t) {
3195: p++; t++;
3196: }
3197: if (t == target_end)
3198: return (UChar* )s;
3199:
3200: s -= reg->int_map_backward[*s];
3201: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3202: }
3203:
3204: return (UChar* )NULL;
3205: }
3206:
3207: static UChar*
3208: map_search(OnigEncoding enc, UChar map[],
3209: const UChar* text, const UChar* text_range)
3210: {
3211: const UChar *s = text;
3212:
3213: while (s < text_range) {
3214: if (map[*s]) return (UChar* )s;
3215:
3216: s += enc_len(enc, s);
3217: }
3218: return (UChar* )NULL;
3219: }
3220:
3221: static UChar*
3222: map_search_backward(OnigEncoding enc, UChar map[],
3223: const UChar* text, const UChar* adjust_text,
3224: const UChar* text_start)
3225: {
3226: const UChar *s = text_start;
3227:
3228: while (s >= text) {
3229: if (map[*s]) return (UChar* )s;
3230:
3231: s = onigenc_get_prev_char_head(enc, adjust_text, s);
3232: }
3233: return (UChar* )NULL;
3234: }
3235:
3236: extern int
3237: onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3238: OnigOptionType option)
3239: {
3240: int r;
3241: UChar *prev;
3242: MatchArg msa;
3243:
3244: #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3245: start:
3246: THREAD_ATOMIC_START;
3247: if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3248: ONIG_STATE_INC(reg);
3249: if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3250: onig_chain_reduce(reg);
3251: ONIG_STATE_INC(reg);
3252: }
3253: }
3254: else {
3255: int n;
3256:
3257: THREAD_ATOMIC_END;
3258: n = 0;
3259: while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3260: if (++n > THREAD_PASS_LIMIT_COUNT)
3261: return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3262: THREAD_PASS;
3263: }
3264: goto start;
3265: }
3266: THREAD_ATOMIC_END;
3267: #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3268:
3269: MATCH_ARG_INIT(msa, option, region, at);
3270: #ifdef USE_COMBINATION_EXPLOSION_CHECK
3271: {
3272: int offset = at - str;
3273: STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3274: }
3275: #endif
3276:
3277: if (region
3278: #ifdef USE_POSIX_REGION_OPTION
3279: && !IS_POSIX_REGION(option)
3280: #endif
3281: ) {
3282: r = onig_region_resize_clear(region, reg->num_mem + 1);
3283: }
3284: else
3285: r = 0;
3286:
3287: if (r == 0) {
3288: prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3289: r = match_at(reg, str, end, at, prev, &msa);
3290: }
3291:
3292: MATCH_ARG_FREE(msa);
3293: ONIG_STATE_DEC_THREAD(reg);
3294: return r;
3295: }
3296:
3297: static int
3298: forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3299: UChar* range, UChar** low, UChar** high, UChar** low_prev)
3300: {
3301: UChar *p, *pprev = (UChar* )NULL;
3302:
3303: #ifdef ONIG_DEBUG_SEARCH
3304: fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3305: (int )str, (int )end, (int )s, (int )range);
3306: #endif
3307:
3308: p = s;
3309: if (reg->dmin > 0) {
3310: if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3311: p += reg->dmin;
3312: }
3313: else {
3314: UChar *q = p + reg->dmin;
3315: while (p < q) p += enc_len(reg->enc, p);
3316: }
3317: }
3318:
3319: retry:
3320: switch (reg->optimize) {
3321: case ONIG_OPTIMIZE_EXACT:
3322: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3323: break;
3324: case ONIG_OPTIMIZE_EXACT_IC:
3325: p = slow_search_ic(reg->enc, reg->ambig_flag,
3326: reg->exact, reg->exact_end, p, end, range);
3327: break;
3328:
3329: case ONIG_OPTIMIZE_EXACT_BM:
3330: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3331: break;
3332:
3333: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3334: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3335: break;
3336:
3337: case ONIG_OPTIMIZE_MAP:
3338: p = map_search(reg->enc, reg->map, p, range);
3339: break;
3340: }
3341:
3342: if (p && p < range) {
3343: if (p - reg->dmin < s) {
3344: retry_gate:
3345: pprev = p;
3346: p += enc_len(reg->enc, p);
3347: goto retry;
3348: }
3349:
3350: if (reg->sub_anchor) {
3351: UChar* prev;
3352:
3353: switch (reg->sub_anchor) {
3354: case ANCHOR_BEGIN_LINE:
3355: if (!ON_STR_BEGIN(p)) {
3356: prev = onigenc_get_prev_char_head(reg->enc,
3357: (pprev ? pprev : str), p);
3358: if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3359: goto retry_gate;
3360: }
3361: break;
3362:
3363: case ANCHOR_END_LINE:
3364: if (ON_STR_END(p)) {
3365: prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3366: (pprev ? pprev : str), p);
3367: if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3368: goto retry_gate;
3369: }
3370: else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3371: #ifdef USE_CRNL_AS_LINE_TERMINATOR
3372: && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3373: #endif
3374: )
3375: goto retry_gate;
3376: break;
3377: }
3378: }
3379:
3380: if (reg->dmax == 0) {
3381: *low = p;
3382: if (low_prev) {
3383: if (*low > s)
3384: *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3385: else
3386: *low_prev = onigenc_get_prev_char_head(reg->enc,
3387: (pprev ? pprev : str), p);
3388: }
3389: }
3390: else {
3391: if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3392: *low = p - reg->dmax;
3393: if (*low > s) {
3394: *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3395: *low, (const UChar** )low_prev);
3396: if (low_prev && IS_NULL(*low_prev))
3397: *low_prev = onigenc_get_prev_char_head(reg->enc,
3398: (pprev ? pprev : s), *low);
3399: }
3400: else {
3401: if (low_prev)
3402: *low_prev = onigenc_get_prev_char_head(reg->enc,
3403: (pprev ? pprev : str), *low);
3404: }
3405: }
3406: }
3407: /* no needs to adjust *high, *high is used as range check only */
3408: *high = p - reg->dmin;
3409:
3410: #ifdef ONIG_DEBUG_SEARCH
3411: fprintf(stderr,
3412: "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3413: (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3414: #endif
3415: return 1; /* success */
3416: }
3417:
3418: return 0; /* fail */
3419: }
3420:
3421: static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3422: int** skip));
3423:
3424: #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3425:
3426: static int
3427: backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3428: UChar* s, const UChar* range, UChar* adjrange,
3429: UChar** low, UChar** high)
3430: {
3431: int r;
3432: UChar *p;
3433:
3434: range += reg->dmin;
3435: p = s;
3436:
3437: retry:
3438: switch (reg->optimize) {
3439: case ONIG_OPTIMIZE_EXACT:
3440: exact_method:
3441: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3442: range, adjrange, end, p);
3443: break;
3444:
3445: case ONIG_OPTIMIZE_EXACT_IC:
3446: p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
3447: reg->exact, reg->exact_end,
3448: range, adjrange, end, p);
3449: break;
3450:
3451: case ONIG_OPTIMIZE_EXACT_BM:
3452: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3453: if (IS_NULL(reg->int_map_backward)) {
3454: if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3455: goto exact_method;
3456:
3457: r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3458: &(reg->int_map_backward));
3459: if (r) return r;
3460: }
3461: p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3462: end, p);
3463: break;
3464:
3465: case ONIG_OPTIMIZE_MAP:
3466: p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3467: break;
3468: }
3469:
3470: if (p) {
3471: if (reg->sub_anchor) {
3472: UChar* prev;
3473:
3474: switch (reg->sub_anchor) {
3475: case ANCHOR_BEGIN_LINE:
3476: if (!ON_STR_BEGIN(p)) {
3477: prev = onigenc_get_prev_char_head(reg->enc, str, p);
3478: if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3479: p = prev;
3480: goto retry;
3481: }
3482: }
3483: break;
3484:
3485: case ANCHOR_END_LINE:
3486: if (ON_STR_END(p)) {
3487: prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3488: if (IS_NULL(prev)) goto fail;
3489: if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3490: p = prev;
3491: goto retry;
3492: }
3493: }
3494: else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3495: #ifdef USE_CRNL_AS_LINE_TERMINATOR
3496: && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3497: #endif
3498: ) {
3499: p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3500: if (IS_NULL(p)) goto fail;
3501: goto retry;
3502: }
3503: break;
3504: }
3505: }
3506:
3507: /* no needs to adjust *high, *high is used as range check only */
3508: if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3509: *low = p - reg->dmax;
3510: *high = p - reg->dmin;
3511: *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3512: }
3513:
3514: #ifdef ONIG_DEBUG_SEARCH
3515: fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3516: (int )(*low - str), (int )(*high - str));
3517: #endif
3518: return 1; /* success */
3519: }
3520:
3521: fail:
3522: #ifdef ONIG_DEBUG_SEARCH
3523: fprintf(stderr, "backward_search_range: fail.\n");
3524: #endif
3525: return 0; /* fail */
3526: }
3527:
3528:
3529: extern int
3530: onig_search(regex_t* reg, const UChar* str, const UChar* end,
3531: const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3532: {
3533: int r;
3534: UChar *s, *prev;
3535: MatchArg msa;
3536: const UChar *orig_start = start;
3537:
3538: #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3539: start:
3540: THREAD_ATOMIC_START;
3541: if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3542: ONIG_STATE_INC(reg);
3543: if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3544: onig_chain_reduce(reg);
3545: ONIG_STATE_INC(reg);
3546: }
3547: }
3548: else {
3549: int n;
3550:
3551: THREAD_ATOMIC_END;
3552: n = 0;
3553: while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3554: if (++n > THREAD_PASS_LIMIT_COUNT)
3555: return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3556: THREAD_PASS;
3557: }
3558: goto start;
3559: }
3560: THREAD_ATOMIC_END;
3561: #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3562:
3563: #ifdef ONIG_DEBUG_SEARCH
3564: fprintf(stderr,
3565: "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3566: (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3567: #endif
3568:
3569: if (region
3570: #ifdef USE_POSIX_REGION_OPTION
3571: && !IS_POSIX_REGION(option)
3572: #endif
3573: ) {
3574: r = onig_region_resize_clear(region, reg->num_mem + 1);
3575: if (r) goto finish_no_msa;
3576: }
3577:
3578: if (start > end || start < str) goto mismatch_no_msa;
3579:
3580: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3581: #define MATCH_AND_RETURN_CHECK \
3582: r = match_at(reg, str, end, s, prev, &msa);\
3583: if (r != ONIG_MISMATCH) {\
3584: if (r >= 0) {\
3585: if (! IS_FIND_LONGEST(reg->options)) {\
3586: goto match;\
3587: }\
3588: }\
3589: else goto finish; /* error */ \
3590: }
3591: #else
3592: #define MATCH_AND_RETURN_CHECK \
3593: r = match_at(reg, str, end, s, prev, &msa);\
3594: if (r != ONIG_MISMATCH) {\
3595: if (r >= 0) {\
3596: goto match;\
3597: }\
3598: else goto finish; /* error */ \
3599: }
3600: #endif
3601:
3602: /* anchor optimize: resume search range */
3603: if (reg->anchor != 0 && str < end) {
3604: UChar *min_semi_end, *max_semi_end;
3605:
3606: if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3607: /* search start-position only */
3608: begin_position:
3609: if (range > start)
3610: range = start + 1;
3611: else
3612: range = start;
3613: }
3614: else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3615: /* search str-position only */
3616: if (range > start) {
3617: if (start != str) goto mismatch_no_msa;
3618: range = str + 1;
3619: }
3620: else {
3621: if (range <= str) {
3622: start = str;
3623: range = str;
3624: }
3625: else
3626: goto mismatch_no_msa;
3627: }
3628: }
3629: else if (reg->anchor & ANCHOR_END_BUF) {
3630: min_semi_end = max_semi_end = (UChar* )end;
3631:
3632: end_buf:
3633: if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3634: goto mismatch_no_msa;
3635:
3636: if (range > start) {
3637: if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3638: start = min_semi_end - reg->anchor_dmax;
3639: if (start < end)
3640: start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3641: else { /* match with empty at end */
3642: start = onigenc_get_prev_char_head(reg->enc, str, end);
3643: }
3644: }
3645: if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3646: range = max_semi_end - reg->anchor_dmin + 1;
3647: }
3648:
3649: if (start >= range) goto mismatch_no_msa;
3650: }
3651: else {
3652: if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3653: range = min_semi_end - reg->anchor_dmax;
3654: }
3655: if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3656: start = max_semi_end - reg->anchor_dmin;
3657: start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3658: }
3659: if (range > start) goto mismatch_no_msa;
3660: }
3661: }
3662: else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3663: UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3664:
3665: max_semi_end = (UChar* )end;
3666: if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3667: min_semi_end = pre_end;
3668:
3669: #ifdef USE_CRNL_AS_LINE_TERMINATOR
3670: pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3671: if (IS_NOT_NULL(pre_end) &&
3672: ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3673: min_semi_end = pre_end;
3674: }
3675: #endif
3676: if (min_semi_end > str && start <= min_semi_end) {
3677: goto end_buf;
3678: }
3679: }
3680: else {
3681: min_semi_end = (UChar* )end;
3682: goto end_buf;
3683: }
3684: }
3685: else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3686: goto begin_position;
3687: }
3688: }
3689: else if (str == end) { /* empty string */
3690: static const UChar* address_for_empty_string = (UChar* )"";
3691:
3692: #ifdef ONIG_DEBUG_SEARCH
3693: fprintf(stderr, "onig_search: empty string.\n");
3694: #endif
3695:
3696: if (reg->threshold_len == 0) {
3697: start = end = str = address_for_empty_string;
3698: s = (UChar* )start;
3699: prev = (UChar* )NULL;
3700:
3701: MATCH_ARG_INIT(msa, option, region, start);
3702: #ifdef USE_COMBINATION_EXPLOSION_CHECK
3703: msa.state_check_buff = (void* )0;
3704: msa.state_check_buff_size = 0;
3705: #endif
3706: MATCH_AND_RETURN_CHECK;
3707: goto mismatch;
3708: }
3709: goto mismatch_no_msa;
3710: }
3711:
3712: #ifdef ONIG_DEBUG_SEARCH
3713: fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3714: (int )(end - str), (int )(start - str), (int )(range - str));
3715: #endif
3716:
3717: MATCH_ARG_INIT(msa, option, region, orig_start);
3718: #ifdef USE_COMBINATION_EXPLOSION_CHECK
3719: {
3720: int offset = (MIN(start, range) - str);
3721: STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3722: }
3723: #endif
3724:
3725: s = (UChar* )start;
3726: if (range > start) { /* forward search */
3727: if (s > str)
3728: prev = onigenc_get_prev_char_head(reg->enc, str, s);
3729: else
3730: prev = (UChar* )NULL;
3731:
3732: if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3733: UChar *sch_range, *low, *high, *low_prev;
3734:
3735: sch_range = (UChar* )range;
3736: if (reg->dmax != 0) {
3737: if (reg->dmax == ONIG_INFINITE_DISTANCE)
3738: sch_range = (UChar* )end;
3739: else {
3740: sch_range += reg->dmax;
3741: if (sch_range > end) sch_range = (UChar* )end;
3742: }
3743: }
3744:
3745: if ((end - start) < reg->threshold_len)
3746: goto mismatch;
3747:
3748: if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3749: do {
3750: if (! forward_search_range(reg, str, end, s, sch_range,
3751: &low, &high, &low_prev)) goto mismatch;
3752: if (s < low) {
3753: s = low;
3754: prev = low_prev;
3755: }
3756: while (s <= high) {
3757: MATCH_AND_RETURN_CHECK;
3758: prev = s;
3759: s += enc_len(reg->enc, s);
3760: }
3761: } while (s < range);
3762: goto mismatch;
3763: }
3764: else { /* check only. */
3765: if (! forward_search_range(reg, str, end, s, sch_range,
3766: &low, &high, (UChar** )NULL)) goto mismatch;
3767:
3768: if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3769: do {
3770: MATCH_AND_RETURN_CHECK;
3771: prev = s;
3772: s += enc_len(reg->enc, s);
3773:
3774: while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3775: prev = s;
3776: s += enc_len(reg->enc, s);
3777: }
3778: } while (s < range);
3779: goto mismatch;
3780: }
3781: }
3782: }
3783:
3784: do {
3785: MATCH_AND_RETURN_CHECK;
3786: prev = s;
3787: s += enc_len(reg->enc, s);
3788: } while (s < range);
3789:
3790: if (s == range) { /* because empty match with /$/. */
3791: MATCH_AND_RETURN_CHECK;
3792: }
3793: }
3794: else { /* backward search */
3795: if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3796: UChar *low, *high, *adjrange, *sch_start;
3797:
3798: if (range < end)
3799: adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3800: else
3801: adjrange = (UChar* )end;
3802:
3803: if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3804: (end - range) >= reg->threshold_len) {
3805: do {
3806: sch_start = s + reg->dmax;
3807: if (sch_start > end) sch_start = (UChar* )end;
3808: if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3809: &low, &high) <= 0)
3810: goto mismatch;
3811:
3812: if (s > high)
3813: s = high;
3814:
3815: while (s >= low) {
3816: prev = onigenc_get_prev_char_head(reg->enc, str, s);
3817: MATCH_AND_RETURN_CHECK;
3818: s = prev;
3819: }
3820: } while (s >= range);
3821: goto mismatch;
3822: }
3823: else { /* check only. */
3824: if ((end - range) < reg->threshold_len) goto mismatch;
3825:
3826: sch_start = s;
3827: if (reg->dmax != 0) {
3828: if (reg->dmax == ONIG_INFINITE_DISTANCE)
3829: sch_start = (UChar* )end;
3830: else {
3831: sch_start += reg->dmax;
3832: if (sch_start > end) sch_start = (UChar* )end;
3833: else
3834: sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3835: start, sch_start);
3836: }
3837: }
3838: if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3839: &low, &high) <= 0) goto mismatch;
3840: }
3841: }
3842:
3843: do {
3844: prev = onigenc_get_prev_char_head(reg->enc, str, s);
3845: MATCH_AND_RETURN_CHECK;
3846: s = prev;
3847: } while (s >= range);
3848: }
3849:
3850: mismatch:
3851: #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3852: if (IS_FIND_LONGEST(reg->options)) {
3853: if (msa.best_len >= 0) {
3854: s = msa.best_s;
3855: goto match;
3856: }
3857: }
3858: #endif
3859: r = ONIG_MISMATCH;
3860:
3861: finish:
3862: MATCH_ARG_FREE(msa);
3863: ONIG_STATE_DEC_THREAD(reg);
3864:
3865: /* If result is mismatch and no FIND_NOT_EMPTY option,
3866: then the region is not setted in match_at(). */
3867: if (IS_FIND_NOT_EMPTY(reg->options) && region
3868: #ifdef USE_POSIX_REGION_OPTION
3869: && !IS_POSIX_REGION(option)
3870: #endif
3871: ) {
3872: onig_region_clear(region);
3873: }
3874:
3875: #ifdef ONIG_DEBUG
3876: if (r != ONIG_MISMATCH)
3877: fprintf(stderr, "onig_search: error %d\n", r);
3878: #endif
3879: return r;
3880:
3881: mismatch_no_msa:
3882: r = ONIG_MISMATCH;
3883: finish_no_msa:
3884: ONIG_STATE_DEC_THREAD(reg);
3885: #ifdef ONIG_DEBUG
3886: if (r != ONIG_MISMATCH)
3887: fprintf(stderr, "onig_search: error %d\n", r);
3888: #endif
3889: return r;
3890:
3891: match:
3892: ONIG_STATE_DEC_THREAD(reg);
3893: MATCH_ARG_FREE(msa);
3894: return s - str;
3895: }
3896:
3897: extern OnigEncoding
3898: onig_get_encoding(regex_t* reg)
3899: {
3900: return reg->enc;
3901: }
3902:
3903: extern OnigOptionType
3904: onig_get_options(regex_t* reg)
3905: {
3906: return reg->options;
3907: }
3908:
3909: extern OnigAmbigType
3910: onig_get_ambig_flag(regex_t* reg)
3911: {
3912: return reg->ambig_flag;
3913: }
3914:
3915: extern OnigSyntaxType*
3916: onig_get_syntax(regex_t* reg)
3917: {
3918: return reg->syntax;
3919: }
3920:
3921: extern int
3922: onig_number_of_captures(regex_t* reg)
3923: {
3924: return reg->num_mem;
3925: }
3926:
3927: extern int
3928: onig_number_of_capture_histories(regex_t* reg)
3929: {
3930: #ifdef USE_CAPTURE_HISTORY
3931: int i, n;
3932:
3933: n = 0;
3934: for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3935: if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3936: n++;
3937: }
3938: return n;
3939: #else
3940: return 0;
3941: #endif
3942: }
3943:
3944: extern void
3945: onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3946: {
3947: *to = *from;
3948: }
3949:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>