1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2013 University of Cambridge
10:
11: The machine code generator part (this module) was written by Zoltan Herczeg
12: Copyright (c) 2010-2013
13:
14: -----------------------------------------------------------------------------
15: Redistribution and use in source and binary forms, with or without
16: modification, are permitted provided that the following conditions are met:
17:
18: * Redistributions of source code must retain the above copyright notice,
19: this list of conditions and the following disclaimer.
20:
21: * Redistributions in binary form must reproduce the above copyright
22: notice, this list of conditions and the following disclaimer in the
23: documentation and/or other materials provided with the distribution.
24:
25: * Neither the name of the University of Cambridge nor the names of its
26: contributors may be used to endorse or promote products derived from
27: this software without specific prior written permission.
28:
29: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39: POSSIBILITY OF SUCH DAMAGE.
40: -----------------------------------------------------------------------------
41: */
42:
43: #ifdef HAVE_CONFIG_H
44: #include "config.h"
45: #endif
46:
47: #include "pcre_internal.h"
48:
49: #if defined SUPPORT_JIT
50:
51: /* All-in-one: Since we use the JIT compiler only from here,
52: we just include it. This way we don't need to touch the build
53: system files. */
54:
55: #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56: #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57: #define SLJIT_CONFIG_AUTO 1
58: #define SLJIT_CONFIG_STATIC 1
59: #define SLJIT_VERBOSE 0
60: #define SLJIT_DEBUG 0
61:
62: #include "sljit/sljitLir.c"
63:
64: #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65: #error Unsupported architecture
66: #endif
67:
68: /* Defines for debugging purposes. */
69:
70: /* 1 - Use unoptimized capturing brackets.
71: 2 - Enable capture_last_ptr (includes option 1). */
72: /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73:
74: /* 1 - Always have a control head. */
75: /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76:
77: /* Allocate memory for the regex stack on the real machine stack.
78: Fast, but limited size. */
79: #define MACHINE_STACK_SIZE 32768
80:
81: /* Growth rate for stack allocated by the OS. Should be the multiply
82: of page size. */
83: #define STACK_GROWTH_RATE 8192
84:
85: /* Enable to check that the allocation could destroy temporaries. */
86: #if defined SLJIT_DEBUG && SLJIT_DEBUG
87: #define DESTROY_REGISTERS 1
88: #endif
89:
90: /*
91: Short summary about the backtracking mechanism empolyed by the jit code generator:
92:
93: The code generator follows the recursive nature of the PERL compatible regular
94: expressions. The basic blocks of regular expressions are condition checkers
95: whose execute different commands depending on the result of the condition check.
96: The relationship between the operators can be horizontal (concatenation) and
97: vertical (sub-expression) (See struct backtrack_common for more details).
98:
99: 'ab' - 'a' and 'b' regexps are concatenated
100: 'a+' - 'a' is the sub-expression of the '+' operator
101:
102: The condition checkers are boolean (true/false) checkers. Machine code is generated
103: for the checker itself and for the actions depending on the result of the checker.
104: The 'true' case is called as the matching path (expected path), and the other is called as
105: the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106: branches on the matching path.
107:
108: Greedy star operator (*) :
109: Matching path: match happens.
110: Backtrack path: match failed.
111: Non-greedy star operator (*?) :
112: Matching path: no need to perform a match.
113: Backtrack path: match is required.
114:
115: The following example shows how the code generated for a capturing bracket
116: with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117: we have the following regular expression:
118:
119: A(B|C)D
120:
121: The generated code will be the following:
122:
123: A matching path
124: '(' matching path (pushing arguments to the stack)
125: B matching path
126: ')' matching path (pushing arguments to the stack)
127: D matching path
128: return with successful match
129:
130: D backtrack path
131: ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132: B backtrack path
133: C expected path
134: jump to D matching path
135: C backtrack path
136: A backtrack path
137:
138: Notice, that the order of backtrack code paths are the opposite of the fast
139: code paths. In this way the topmost value on the stack is always belong
140: to the current backtrack code path. The backtrack path must check
141: whether there is a next alternative. If so, it needs to jump back to
142: the matching path eventually. Otherwise it needs to clear out its own stack
143: frame and continue the execution on the backtrack code paths.
144: */
145:
146: /*
147: Saved stack frames:
148:
149: Atomic blocks and asserts require reloading the values of private data
150: when the backtrack mechanism performed. Because of OP_RECURSE, the data
151: are not necessarly known in compile time, thus we need a dynamic restore
152: mechanism.
153:
154: The stack frames are stored in a chain list, and have the following format:
155: ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156:
157: Thus we can restore the private data to a particular point in the stack.
158: */
159:
160: typedef struct jit_arguments {
161: /* Pointers first. */
162: struct sljit_stack *stack;
163: const pcre_uchar *str;
164: const pcre_uchar *begin;
165: const pcre_uchar *end;
166: int *offsets;
167: pcre_uchar *uchar_ptr;
168: pcre_uchar *mark_ptr;
169: void *callout_data;
170: /* Everything else after. */
171: pcre_uint32 limit_match;
172: int real_offset_count;
173: int offset_count;
174: pcre_uint8 notbol;
175: pcre_uint8 noteol;
176: pcre_uint8 notempty;
177: pcre_uint8 notempty_atstart;
178: } jit_arguments;
179:
180: typedef struct executable_functions {
181: void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182: PUBL(jit_callback) callback;
183: void *userdata;
184: pcre_uint32 top_bracket;
185: pcre_uint32 limit_match;
186: sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187: } executable_functions;
188:
189: typedef struct jump_list {
190: struct sljit_jump *jump;
191: struct jump_list *next;
192: } jump_list;
193:
194: typedef struct stub_list {
195: struct sljit_jump *start;
196: struct sljit_label *quit;
197: struct stub_list *next;
198: } stub_list;
199:
200: enum frame_types {
201: no_frame = -1,
202: no_stack = -2
203: };
204:
205: enum control_types {
206: type_mark = 0,
207: type_then_trap = 1
208: };
209:
210: typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211:
212: /* The following structure is the key data type for the recursive
213: code generator. It is allocated by compile_matchingpath, and contains
214: the arguments for compile_backtrackingpath. Must be the first member
215: of its descendants. */
216: typedef struct backtrack_common {
217: /* Concatenation stack. */
218: struct backtrack_common *prev;
219: jump_list *nextbacktracks;
220: /* Internal stack (for component operators). */
221: struct backtrack_common *top;
222: jump_list *topbacktracks;
223: /* Opcode pointer. */
224: pcre_uchar *cc;
225: } backtrack_common;
226:
227: typedef struct assert_backtrack {
228: backtrack_common common;
229: jump_list *condfailed;
230: /* Less than 0 if a frame is not needed. */
231: int framesize;
232: /* Points to our private memory word on the stack. */
233: int private_data_ptr;
234: /* For iterators. */
235: struct sljit_label *matchingpath;
236: } assert_backtrack;
237:
238: typedef struct bracket_backtrack {
239: backtrack_common common;
240: /* Where to coninue if an alternative is successfully matched. */
241: struct sljit_label *alternative_matchingpath;
242: /* For rmin and rmax iterators. */
243: struct sljit_label *recursive_matchingpath;
244: /* For greedy ? operator. */
245: struct sljit_label *zero_matchingpath;
246: /* Contains the branches of a failed condition. */
247: union {
248: /* Both for OP_COND, OP_SCOND. */
249: jump_list *condfailed;
250: assert_backtrack *assert;
251: /* For OP_ONCE. Less than 0 if not needed. */
252: int framesize;
253: } u;
254: /* Points to our private memory word on the stack. */
255: int private_data_ptr;
256: } bracket_backtrack;
257:
258: typedef struct bracketpos_backtrack {
259: backtrack_common common;
260: /* Points to our private memory word on the stack. */
261: int private_data_ptr;
262: /* Reverting stack is needed. */
263: int framesize;
264: /* Allocated stack size. */
265: int stacksize;
266: } bracketpos_backtrack;
267:
268: typedef struct braminzero_backtrack {
269: backtrack_common common;
270: struct sljit_label *matchingpath;
271: } braminzero_backtrack;
272:
273: typedef struct iterator_backtrack {
274: backtrack_common common;
275: /* Next iteration. */
276: struct sljit_label *matchingpath;
277: } iterator_backtrack;
278:
279: typedef struct recurse_entry {
280: struct recurse_entry *next;
281: /* Contains the function entry. */
282: struct sljit_label *entry;
283: /* Collects the calls until the function is not created. */
284: jump_list *calls;
285: /* Points to the starting opcode. */
286: sljit_sw start;
287: } recurse_entry;
288:
289: typedef struct recurse_backtrack {
290: backtrack_common common;
291: BOOL inlined_pattern;
292: } recurse_backtrack;
293:
294: #define OP_THEN_TRAP OP_TABLE_LENGTH
295:
296: typedef struct then_trap_backtrack {
297: backtrack_common common;
298: /* If then_trap is not NULL, this structure contains the real
299: then_trap for the backtracking path. */
300: struct then_trap_backtrack *then_trap;
301: /* Points to the starting opcode. */
302: sljit_sw start;
303: /* Exit point for the then opcodes of this alternative. */
304: jump_list *quit;
305: /* Frame size of the current alternative. */
306: int framesize;
307: } then_trap_backtrack;
308:
309: #define MAX_RANGE_SIZE 6
310:
311: typedef struct compiler_common {
312: /* The sljit ceneric compiler. */
313: struct sljit_compiler *compiler;
314: /* First byte code. */
315: pcre_uchar *start;
316: /* Maps private data offset to each opcode. */
317: sljit_si *private_data_ptrs;
318: /* Tells whether the capturing bracket is optimized. */
319: pcre_uint8 *optimized_cbracket;
320: /* Tells whether the starting offset is a target of then. */
321: pcre_uint8 *then_offsets;
322: /* Current position where a THEN must jump. */
323: then_trap_backtrack *then_trap;
324: /* Starting offset of private data for capturing brackets. */
325: int cbra_ptr;
326: /* Output vector starting point. Must be divisible by 2. */
327: int ovector_start;
328: /* Last known position of the requested byte. */
329: int req_char_ptr;
330: /* Head of the last recursion. */
331: int recursive_head_ptr;
332: /* First inspected character for partial matching. */
333: int start_used_ptr;
334: /* Starting pointer for partial soft matches. */
335: int hit_start;
336: /* End pointer of the first line. */
337: int first_line_end;
338: /* Points to the marked string. */
339: int mark_ptr;
340: /* Recursive control verb management chain. */
341: int control_head_ptr;
342: /* Points to the last matched capture block index. */
343: int capture_last_ptr;
344: /* Points to the starting position of the current match. */
345: int start_ptr;
346:
347: /* Flipped and lower case tables. */
348: const pcre_uint8 *fcc;
349: sljit_sw lcc;
350: /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351: int mode;
352: /* \K is found in the pattern. */
353: BOOL has_set_som;
354: /* (*SKIP:arg) is found in the pattern. */
355: BOOL has_skip_arg;
356: /* (*THEN) is found in the pattern. */
357: BOOL has_then;
358: /* Needs to know the start position anytime. */
359: BOOL needs_start_ptr;
360: /* Currently in recurse or negative assert. */
361: BOOL local_exit;
362: /* Currently in a positive assert. */
363: BOOL positive_assert;
364: /* Newline control. */
365: int nltype;
366: int newline;
367: int bsr_nltype;
368: /* Dollar endonly. */
369: int endonly;
370: /* Tables. */
371: sljit_sw ctypes;
372: int digits[2 + MAX_RANGE_SIZE];
373: /* Named capturing brackets. */
374: sljit_uw name_table;
375: sljit_sw name_count;
376: sljit_sw name_entry_size;
377:
378: /* Labels and jump lists. */
379: struct sljit_label *partialmatchlabel;
380: struct sljit_label *quit_label;
381: struct sljit_label *forced_quit_label;
382: struct sljit_label *accept_label;
383: stub_list *stubs;
384: recurse_entry *entries;
385: recurse_entry *currententry;
386: jump_list *partialmatch;
387: jump_list *quit;
388: jump_list *positive_assert_quit;
389: jump_list *forced_quit;
390: jump_list *accept;
391: jump_list *calllimit;
392: jump_list *stackalloc;
393: jump_list *revertframes;
394: jump_list *wordboundary;
395: jump_list *anynewline;
396: jump_list *hspace;
397: jump_list *vspace;
398: jump_list *casefulcmp;
399: jump_list *caselesscmp;
400: jump_list *reset_match;
401: BOOL jscript_compat;
402: #ifdef SUPPORT_UTF
403: BOOL utf;
404: #ifdef SUPPORT_UCP
405: BOOL use_ucp;
406: #endif
407: #ifndef COMPILE_PCRE32
408: jump_list *utfreadchar;
409: #endif
410: #ifdef COMPILE_PCRE8
411: jump_list *utfreadtype8;
412: #endif
413: #endif /* SUPPORT_UTF */
414: #ifdef SUPPORT_UCP
415: jump_list *getucd;
416: #endif
417: } compiler_common;
418:
419: /* For byte_sequence_compare. */
420:
421: typedef struct compare_context {
422: int length;
423: int sourcereg;
424: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425: int ucharptr;
426: union {
427: sljit_si asint;
428: sljit_uh asushort;
429: #if defined COMPILE_PCRE8
430: sljit_ub asbyte;
431: sljit_ub asuchars[4];
432: #elif defined COMPILE_PCRE16
433: sljit_uh asuchars[2];
434: #elif defined COMPILE_PCRE32
435: sljit_ui asuchars[1];
436: #endif
437: } c;
438: union {
439: sljit_si asint;
440: sljit_uh asushort;
441: #if defined COMPILE_PCRE8
442: sljit_ub asbyte;
443: sljit_ub asuchars[4];
444: #elif defined COMPILE_PCRE16
445: sljit_uh asuchars[2];
446: #elif defined COMPILE_PCRE32
447: sljit_ui asuchars[1];
448: #endif
449: } oc;
450: #endif
451: } compare_context;
452:
453: /* Undefine sljit macros. */
454: #undef CMP
455:
456: /* Used for accessing the elements of the stack. */
457: #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458:
459: #define TMP1 SLJIT_SCRATCH_REG1
460: #define TMP2 SLJIT_SCRATCH_REG3
461: #define TMP3 SLJIT_TEMPORARY_EREG2
462: #define STR_PTR SLJIT_SAVED_REG1
463: #define STR_END SLJIT_SAVED_REG2
464: #define STACK_TOP SLJIT_SCRATCH_REG2
465: #define STACK_LIMIT SLJIT_SAVED_REG3
466: #define ARGUMENTS SLJIT_SAVED_EREG1
467: #define COUNT_MATCH SLJIT_SAVED_EREG2
468: #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469:
470: /* Local space layout. */
471: /* These two locals can be used by the current opcode. */
472: #define LOCALS0 (0 * sizeof(sljit_sw))
473: #define LOCALS1 (1 * sizeof(sljit_sw))
474: /* Two local variables for possessive quantifiers (char1 cannot use them). */
475: #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476: #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477: /* Max limit of recursions. */
478: #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479: /* The output vector is stored on the stack, and contains pointers
480: to characters. The vector data is divided into two groups: the first
481: group contains the start / end character pointers, and the second is
482: the start pointers when the end of the capturing group has not yet reached. */
483: #define OVECTOR_START (common->ovector_start)
484: #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
485: #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
486: #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487:
488: #if defined COMPILE_PCRE8
489: #define MOV_UCHAR SLJIT_MOV_UB
490: #define MOVU_UCHAR SLJIT_MOVU_UB
491: #elif defined COMPILE_PCRE16
492: #define MOV_UCHAR SLJIT_MOV_UH
493: #define MOVU_UCHAR SLJIT_MOVU_UH
494: #elif defined COMPILE_PCRE32
495: #define MOV_UCHAR SLJIT_MOV_UI
496: #define MOVU_UCHAR SLJIT_MOVU_UI
497: #else
498: #error Unsupported compiling mode
499: #endif
500:
501: /* Shortcuts. */
502: #define DEFINE_COMPILER \
503: struct sljit_compiler *compiler = common->compiler
504: #define OP1(op, dst, dstw, src, srcw) \
505: sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506: #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507: sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508: #define LABEL() \
509: sljit_emit_label(compiler)
510: #define JUMP(type) \
511: sljit_emit_jump(compiler, (type))
512: #define JUMPTO(type, label) \
513: sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514: #define JUMPHERE(jump) \
515: sljit_set_label((jump), sljit_emit_label(compiler))
516: #define SET_LABEL(jump, label) \
517: sljit_set_label((jump), (label))
518: #define CMP(type, src1, src1w, src2, src2w) \
519: sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520: #define CMPTO(type, src1, src1w, src2, src2w, label) \
521: sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522: #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523: sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524: #define GET_LOCAL_BASE(dst, dstw, offset) \
525: sljit_get_local_base(compiler, (dst), (dstw), (offset))
526:
527: static pcre_uchar* bracketend(pcre_uchar* cc)
528: {
529: SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530: do cc += GET(cc, 1); while (*cc == OP_ALT);
531: SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532: cc += 1 + LINK_SIZE;
533: return cc;
534: }
535:
536: /* Functions whose might need modification for all new supported opcodes:
537: next_opcode
538: check_opcode_types
539: set_private_data_ptrs
540: get_framesize
541: init_frame
542: get_private_data_copy_length
543: copy_private_data
544: compile_matchingpath
545: compile_backtrackingpath
546: */
547:
548: static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
549: {
550: SLJIT_UNUSED_ARG(common);
551: switch(*cc)
552: {
553: case OP_SOD:
554: case OP_SOM:
555: case OP_SET_SOM:
556: case OP_NOT_WORD_BOUNDARY:
557: case OP_WORD_BOUNDARY:
558: case OP_NOT_DIGIT:
559: case OP_DIGIT:
560: case OP_NOT_WHITESPACE:
561: case OP_WHITESPACE:
562: case OP_NOT_WORDCHAR:
563: case OP_WORDCHAR:
564: case OP_ANY:
565: case OP_ALLANY:
566: case OP_NOTPROP:
567: case OP_PROP:
568: case OP_ANYNL:
569: case OP_NOT_HSPACE:
570: case OP_HSPACE:
571: case OP_NOT_VSPACE:
572: case OP_VSPACE:
573: case OP_EXTUNI:
574: case OP_EODN:
575: case OP_EOD:
576: case OP_CIRC:
577: case OP_CIRCM:
578: case OP_DOLL:
579: case OP_DOLLM:
580: case OP_CRSTAR:
581: case OP_CRMINSTAR:
582: case OP_CRPLUS:
583: case OP_CRMINPLUS:
584: case OP_CRQUERY:
585: case OP_CRMINQUERY:
586: case OP_CRRANGE:
587: case OP_CRMINRANGE:
588: case OP_CLASS:
589: case OP_NCLASS:
590: case OP_REF:
591: case OP_REFI:
592: case OP_RECURSE:
593: case OP_CALLOUT:
594: case OP_ALT:
595: case OP_KET:
596: case OP_KETRMAX:
597: case OP_KETRMIN:
598: case OP_KETRPOS:
599: case OP_REVERSE:
600: case OP_ASSERT:
601: case OP_ASSERT_NOT:
602: case OP_ASSERTBACK:
603: case OP_ASSERTBACK_NOT:
604: case OP_ONCE:
605: case OP_ONCE_NC:
606: case OP_BRA:
607: case OP_BRAPOS:
608: case OP_CBRA:
609: case OP_CBRAPOS:
610: case OP_COND:
611: case OP_SBRA:
612: case OP_SBRAPOS:
613: case OP_SCBRA:
614: case OP_SCBRAPOS:
615: case OP_SCOND:
616: case OP_CREF:
617: case OP_NCREF:
618: case OP_RREF:
619: case OP_NRREF:
620: case OP_DEF:
621: case OP_BRAZERO:
622: case OP_BRAMINZERO:
623: case OP_BRAPOSZERO:
624: case OP_PRUNE:
625: case OP_SKIP:
626: case OP_THEN:
627: case OP_COMMIT:
628: case OP_FAIL:
629: case OP_ACCEPT:
630: case OP_ASSERT_ACCEPT:
631: case OP_CLOSE:
632: case OP_SKIPZERO:
633: return cc + PRIV(OP_lengths)[*cc];
634:
635: case OP_CHAR:
636: case OP_CHARI:
637: case OP_NOT:
638: case OP_NOTI:
639: case OP_STAR:
640: case OP_MINSTAR:
641: case OP_PLUS:
642: case OP_MINPLUS:
643: case OP_QUERY:
644: case OP_MINQUERY:
645: case OP_UPTO:
646: case OP_MINUPTO:
647: case OP_EXACT:
648: case OP_POSSTAR:
649: case OP_POSPLUS:
650: case OP_POSQUERY:
651: case OP_POSUPTO:
652: case OP_STARI:
653: case OP_MINSTARI:
654: case OP_PLUSI:
655: case OP_MINPLUSI:
656: case OP_QUERYI:
657: case OP_MINQUERYI:
658: case OP_UPTOI:
659: case OP_MINUPTOI:
660: case OP_EXACTI:
661: case OP_POSSTARI:
662: case OP_POSPLUSI:
663: case OP_POSQUERYI:
664: case OP_POSUPTOI:
665: case OP_NOTSTAR:
666: case OP_NOTMINSTAR:
667: case OP_NOTPLUS:
668: case OP_NOTMINPLUS:
669: case OP_NOTQUERY:
670: case OP_NOTMINQUERY:
671: case OP_NOTUPTO:
672: case OP_NOTMINUPTO:
673: case OP_NOTEXACT:
674: case OP_NOTPOSSTAR:
675: case OP_NOTPOSPLUS:
676: case OP_NOTPOSQUERY:
677: case OP_NOTPOSUPTO:
678: case OP_NOTSTARI:
679: case OP_NOTMINSTARI:
680: case OP_NOTPLUSI:
681: case OP_NOTMINPLUSI:
682: case OP_NOTQUERYI:
683: case OP_NOTMINQUERYI:
684: case OP_NOTUPTOI:
685: case OP_NOTMINUPTOI:
686: case OP_NOTEXACTI:
687: case OP_NOTPOSSTARI:
688: case OP_NOTPOSPLUSI:
689: case OP_NOTPOSQUERYI:
690: case OP_NOTPOSUPTOI:
691: cc += PRIV(OP_lengths)[*cc];
692: #ifdef SUPPORT_UTF
693: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
694: #endif
695: return cc;
696:
697: /* Special cases. */
698: case OP_TYPESTAR:
699: case OP_TYPEMINSTAR:
700: case OP_TYPEPLUS:
701: case OP_TYPEMINPLUS:
702: case OP_TYPEQUERY:
703: case OP_TYPEMINQUERY:
704: case OP_TYPEUPTO:
705: case OP_TYPEMINUPTO:
706: case OP_TYPEEXACT:
707: case OP_TYPEPOSSTAR:
708: case OP_TYPEPOSPLUS:
709: case OP_TYPEPOSQUERY:
710: case OP_TYPEPOSUPTO:
711: return cc + PRIV(OP_lengths)[*cc] - 1;
712:
713: case OP_ANYBYTE:
714: #ifdef SUPPORT_UTF
715: if (common->utf) return NULL;
716: #endif
717: return cc + 1;
718:
719: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
720: case OP_XCLASS:
721: return cc + GET(cc, 1);
722: #endif
723:
724: case OP_MARK:
725: case OP_PRUNE_ARG:
726: case OP_SKIP_ARG:
727: case OP_THEN_ARG:
728: return cc + 1 + 2 + cc[1];
729:
730: default:
731: /* All opcodes are supported now! */
732: SLJIT_ASSERT_STOP();
733: return NULL;
734: }
735: }
736:
737: static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
738: {
739: pcre_uchar *name;
740: pcre_uchar *name2;
741: unsigned int cbra_index;
742: int i;
743:
744: /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
745: while (cc < ccend)
746: {
747: switch(*cc)
748: {
749: case OP_SET_SOM:
750: common->has_set_som = TRUE;
751: cc += 1;
752: break;
753:
754: case OP_REF:
755: case OP_REFI:
756: common->optimized_cbracket[GET2(cc, 1)] = 0;
757: cc += 1 + IMM2_SIZE;
758: break;
759:
760: case OP_CBRAPOS:
761: case OP_SCBRAPOS:
762: common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
763: cc += 1 + LINK_SIZE + IMM2_SIZE;
764: break;
765:
766: case OP_COND:
767: case OP_SCOND:
768: /* Only AUTO_CALLOUT can insert this opcode. We do
769: not intend to support this case. */
770: if (cc[1 + LINK_SIZE] == OP_CALLOUT)
771: return FALSE;
772: cc += 1 + LINK_SIZE;
773: break;
774:
775: case OP_CREF:
776: i = GET2(cc, 1);
777: common->optimized_cbracket[i] = 0;
778: cc += 1 + IMM2_SIZE;
779: break;
780:
781: case OP_NCREF:
782: cbra_index = GET2(cc, 1);
783: name = (pcre_uchar *)common->name_table;
784: name2 = name;
785: for (i = 0; i < common->name_count; i++)
786: {
787: if (GET2(name, 0) == cbra_index) break;
788: name += common->name_entry_size;
789: }
790: SLJIT_ASSERT(i != common->name_count);
791:
792: for (i = 0; i < common->name_count; i++)
793: {
794: if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)
795: common->optimized_cbracket[GET2(name2, 0)] = 0;
796: name2 += common->name_entry_size;
797: }
798: cc += 1 + IMM2_SIZE;
799: break;
800:
801: case OP_RECURSE:
802: /* Set its value only once. */
803: if (common->recursive_head_ptr == 0)
804: {
805: common->recursive_head_ptr = common->ovector_start;
806: common->ovector_start += sizeof(sljit_sw);
807: }
808: cc += 1 + LINK_SIZE;
809: break;
810:
811: case OP_CALLOUT:
812: if (common->capture_last_ptr == 0)
813: {
814: common->capture_last_ptr = common->ovector_start;
815: common->ovector_start += sizeof(sljit_sw);
816: }
817: cc += 2 + 2 * LINK_SIZE;
818: break;
819:
820: case OP_THEN_ARG:
821: common->has_then = TRUE;
822: common->control_head_ptr = 1;
823: /* Fall through. */
824:
825: case OP_PRUNE_ARG:
826: common->needs_start_ptr = TRUE;
827: /* Fall through. */
828:
829: case OP_MARK:
830: if (common->mark_ptr == 0)
831: {
832: common->mark_ptr = common->ovector_start;
833: common->ovector_start += sizeof(sljit_sw);
834: }
835: cc += 1 + 2 + cc[1];
836: break;
837:
838: case OP_THEN:
839: common->has_then = TRUE;
840: common->control_head_ptr = 1;
841: /* Fall through. */
842:
843: case OP_PRUNE:
844: case OP_SKIP:
845: common->needs_start_ptr = TRUE;
846: cc += 1;
847: break;
848:
849: case OP_SKIP_ARG:
850: common->control_head_ptr = 1;
851: common->has_skip_arg = TRUE;
852: cc += 1 + 2 + cc[1];
853: break;
854:
855: default:
856: cc = next_opcode(common, cc);
857: if (cc == NULL)
858: return FALSE;
859: break;
860: }
861: }
862: return TRUE;
863: }
864:
865: static int get_class_iterator_size(pcre_uchar *cc)
866: {
867: switch(*cc)
868: {
869: case OP_CRSTAR:
870: case OP_CRPLUS:
871: return 2;
872:
873: case OP_CRMINSTAR:
874: case OP_CRMINPLUS:
875: case OP_CRQUERY:
876: case OP_CRMINQUERY:
877: return 1;
878:
879: case OP_CRRANGE:
880: case OP_CRMINRANGE:
881: if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
882: return 0;
883: return 2;
884:
885: default:
886: return 0;
887: }
888: }
889:
890: static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
891: {
892: pcre_uchar *end = bracketend(begin);
893: pcre_uchar *next;
894: pcre_uchar *next_end;
895: pcre_uchar *max_end;
896: pcre_uchar type;
897: sljit_sw length = end - begin;
898: int min, max, i;
899:
900: /* Detect fixed iterations first. */
901: if (end[-(1 + LINK_SIZE)] != OP_KET)
902: return FALSE;
903:
904: /* Already detected repeat. */
905: if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
906: return TRUE;
907:
908: next = end;
909: min = 1;
910: while (1)
911: {
912: if (*next != *begin)
913: break;
914: next_end = bracketend(next);
915: if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
916: break;
917: next = next_end;
918: min++;
919: }
920:
921: if (min == 2)
922: return FALSE;
923:
924: max = 0;
925: max_end = next;
926: if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
927: {
928: type = *next;
929: while (1)
930: {
931: if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
932: break;
933: next_end = bracketend(next + 2 + LINK_SIZE);
934: if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
935: break;
936: next = next_end;
937: max++;
938: }
939:
940: if (next[0] == type && next[1] == *begin && max >= 1)
941: {
942: next_end = bracketend(next + 1);
943: if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
944: {
945: for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
946: if (*next_end != OP_KET)
947: break;
948:
949: if (i == max)
950: {
951: common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
952: common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
953: /* +2 the original and the last. */
954: common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
955: if (min == 1)
956: return TRUE;
957: min--;
958: max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
959: }
960: }
961: }
962: }
963:
964: if (min >= 3)
965: {
966: common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
967: common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
968: common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
969: return TRUE;
970: }
971:
972: return FALSE;
973: }
974:
975: #define CASE_ITERATOR_PRIVATE_DATA_1 \
976: case OP_MINSTAR: \
977: case OP_MINPLUS: \
978: case OP_QUERY: \
979: case OP_MINQUERY: \
980: case OP_MINSTARI: \
981: case OP_MINPLUSI: \
982: case OP_QUERYI: \
983: case OP_MINQUERYI: \
984: case OP_NOTMINSTAR: \
985: case OP_NOTMINPLUS: \
986: case OP_NOTQUERY: \
987: case OP_NOTMINQUERY: \
988: case OP_NOTMINSTARI: \
989: case OP_NOTMINPLUSI: \
990: case OP_NOTQUERYI: \
991: case OP_NOTMINQUERYI:
992:
993: #define CASE_ITERATOR_PRIVATE_DATA_2A \
994: case OP_STAR: \
995: case OP_PLUS: \
996: case OP_STARI: \
997: case OP_PLUSI: \
998: case OP_NOTSTAR: \
999: case OP_NOTPLUS: \
1000: case OP_NOTSTARI: \
1001: case OP_NOTPLUSI:
1002:
1003: #define CASE_ITERATOR_PRIVATE_DATA_2B \
1004: case OP_UPTO: \
1005: case OP_MINUPTO: \
1006: case OP_UPTOI: \
1007: case OP_MINUPTOI: \
1008: case OP_NOTUPTO: \
1009: case OP_NOTMINUPTO: \
1010: case OP_NOTUPTOI: \
1011: case OP_NOTMINUPTOI:
1012:
1013: #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1014: case OP_TYPEMINSTAR: \
1015: case OP_TYPEMINPLUS: \
1016: case OP_TYPEQUERY: \
1017: case OP_TYPEMINQUERY:
1018:
1019: #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1020: case OP_TYPESTAR: \
1021: case OP_TYPEPLUS:
1022:
1023: #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1024: case OP_TYPEUPTO: \
1025: case OP_TYPEMINUPTO:
1026:
1027: static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1028: {
1029: pcre_uchar *cc = common->start;
1030: pcre_uchar *alternative;
1031: pcre_uchar *end = NULL;
1032: int private_data_ptr = *private_data_start;
1033: int space, size, bracketlen;
1034:
1035: while (cc < ccend)
1036: {
1037: space = 0;
1038: size = 0;
1039: bracketlen = 0;
1040: if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1041: return;
1042:
1043: if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1044: if (detect_repeat(common, cc))
1045: {
1046: /* These brackets are converted to repeats, so no global
1047: based single character repeat is allowed. */
1048: if (cc >= end)
1049: end = bracketend(cc);
1050: }
1051:
1052: switch(*cc)
1053: {
1054: case OP_KET:
1055: if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1056: {
1057: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1058: private_data_ptr += sizeof(sljit_sw);
1059: cc += common->private_data_ptrs[cc + 1 - common->start];
1060: }
1061: cc += 1 + LINK_SIZE;
1062: break;
1063:
1064: case OP_ASSERT:
1065: case OP_ASSERT_NOT:
1066: case OP_ASSERTBACK:
1067: case OP_ASSERTBACK_NOT:
1068: case OP_ONCE:
1069: case OP_ONCE_NC:
1070: case OP_BRAPOS:
1071: case OP_SBRA:
1072: case OP_SBRAPOS:
1073: case OP_SCOND:
1074: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1075: private_data_ptr += sizeof(sljit_sw);
1076: bracketlen = 1 + LINK_SIZE;
1077: break;
1078:
1079: case OP_CBRAPOS:
1080: case OP_SCBRAPOS:
1081: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1082: private_data_ptr += sizeof(sljit_sw);
1083: bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1084: break;
1085:
1086: case OP_COND:
1087: /* Might be a hidden SCOND. */
1088: alternative = cc + GET(cc, 1);
1089: if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1090: {
1091: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1092: private_data_ptr += sizeof(sljit_sw);
1093: }
1094: bracketlen = 1 + LINK_SIZE;
1095: break;
1096:
1097: case OP_BRA:
1098: bracketlen = 1 + LINK_SIZE;
1099: break;
1100:
1101: case OP_CBRA:
1102: case OP_SCBRA:
1103: bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1104: break;
1105:
1106: CASE_ITERATOR_PRIVATE_DATA_1
1107: space = 1;
1108: size = -2;
1109: break;
1110:
1111: CASE_ITERATOR_PRIVATE_DATA_2A
1112: space = 2;
1113: size = -2;
1114: break;
1115:
1116: CASE_ITERATOR_PRIVATE_DATA_2B
1117: space = 2;
1118: size = -(2 + IMM2_SIZE);
1119: break;
1120:
1121: CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1122: space = 1;
1123: size = 1;
1124: break;
1125:
1126: CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1127: if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1128: space = 2;
1129: size = 1;
1130: break;
1131:
1132: CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1133: if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1134: space = 2;
1135: size = 1 + IMM2_SIZE;
1136: break;
1137:
1138: case OP_CLASS:
1139: case OP_NCLASS:
1140: size += 1 + 32 / sizeof(pcre_uchar);
1141: space = get_class_iterator_size(cc + size);
1142: break;
1143:
1144: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1145: case OP_XCLASS:
1146: size = GET(cc, 1);
1147: space = get_class_iterator_size(cc + size);
1148: break;
1149: #endif
1150:
1151: default:
1152: cc = next_opcode(common, cc);
1153: SLJIT_ASSERT(cc != NULL);
1154: break;
1155: }
1156:
1157: /* Character iterators, which are not inside a repeated bracket,
1158: gets a private slot instead of allocating it on the stack. */
1159: if (space > 0 && cc >= end)
1160: {
1161: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1162: private_data_ptr += sizeof(sljit_sw) * space;
1163: }
1164:
1165: if (size != 0)
1166: {
1167: if (size < 0)
1168: {
1169: cc += -size;
1170: #ifdef SUPPORT_UTF
1171: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1172: #endif
1173: }
1174: else
1175: cc += size;
1176: }
1177:
1178: if (bracketlen > 0)
1179: {
1180: if (cc >= end)
1181: {
1182: end = bracketend(cc);
1183: if (end[-1 - LINK_SIZE] == OP_KET)
1184: end = NULL;
1185: }
1186: cc += bracketlen;
1187: }
1188: }
1189: *private_data_start = private_data_ptr;
1190: }
1191:
1192: /* Returns with a frame_types (always < 0) if no need for frame. */
1193: static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1194: {
1195: int length = 0;
1196: int possessive = 0;
1197: BOOL stack_restore = FALSE;
1198: BOOL setsom_found = recursive;
1199: BOOL setmark_found = recursive;
1200: /* The last capture is a local variable even for recursions. */
1201: BOOL capture_last_found = FALSE;
1202:
1203: #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1204: SLJIT_ASSERT(common->control_head_ptr != 0);
1205: *needs_control_head = TRUE;
1206: #else
1207: *needs_control_head = FALSE;
1208: #endif
1209:
1210: if (ccend == NULL)
1211: {
1212: ccend = bracketend(cc) - (1 + LINK_SIZE);
1213: if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1214: {
1215: possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1216: /* This is correct regardless of common->capture_last_ptr. */
1217: capture_last_found = TRUE;
1218: }
1219: cc = next_opcode(common, cc);
1220: }
1221:
1222: SLJIT_ASSERT(cc != NULL);
1223: while (cc < ccend)
1224: switch(*cc)
1225: {
1226: case OP_SET_SOM:
1227: SLJIT_ASSERT(common->has_set_som);
1228: stack_restore = TRUE;
1229: if (!setsom_found)
1230: {
1231: length += 2;
1232: setsom_found = TRUE;
1233: }
1234: cc += 1;
1235: break;
1236:
1237: case OP_MARK:
1238: case OP_PRUNE_ARG:
1239: case OP_THEN_ARG:
1240: SLJIT_ASSERT(common->mark_ptr != 0);
1241: stack_restore = TRUE;
1242: if (!setmark_found)
1243: {
1244: length += 2;
1245: setmark_found = TRUE;
1246: }
1247: if (common->control_head_ptr != 0)
1248: *needs_control_head = TRUE;
1249: cc += 1 + 2 + cc[1];
1250: break;
1251:
1252: case OP_RECURSE:
1253: stack_restore = TRUE;
1254: if (common->has_set_som && !setsom_found)
1255: {
1256: length += 2;
1257: setsom_found = TRUE;
1258: }
1259: if (common->mark_ptr != 0 && !setmark_found)
1260: {
1261: length += 2;
1262: setmark_found = TRUE;
1263: }
1264: if (common->capture_last_ptr != 0 && !capture_last_found)
1265: {
1266: length += 2;
1267: capture_last_found = TRUE;
1268: }
1269: cc += 1 + LINK_SIZE;
1270: break;
1271:
1272: case OP_CBRA:
1273: case OP_CBRAPOS:
1274: case OP_SCBRA:
1275: case OP_SCBRAPOS:
1276: stack_restore = TRUE;
1277: if (common->capture_last_ptr != 0 && !capture_last_found)
1278: {
1279: length += 2;
1280: capture_last_found = TRUE;
1281: }
1282: length += 3;
1283: cc += 1 + LINK_SIZE + IMM2_SIZE;
1284: break;
1285:
1286: default:
1287: stack_restore = TRUE;
1288: /* Fall through. */
1289:
1290: case OP_NOT_WORD_BOUNDARY:
1291: case OP_WORD_BOUNDARY:
1292: case OP_NOT_DIGIT:
1293: case OP_DIGIT:
1294: case OP_NOT_WHITESPACE:
1295: case OP_WHITESPACE:
1296: case OP_NOT_WORDCHAR:
1297: case OP_WORDCHAR:
1298: case OP_ANY:
1299: case OP_ALLANY:
1300: case OP_ANYBYTE:
1301: case OP_NOTPROP:
1302: case OP_PROP:
1303: case OP_ANYNL:
1304: case OP_NOT_HSPACE:
1305: case OP_HSPACE:
1306: case OP_NOT_VSPACE:
1307: case OP_VSPACE:
1308: case OP_EXTUNI:
1309: case OP_EODN:
1310: case OP_EOD:
1311: case OP_CIRC:
1312: case OP_CIRCM:
1313: case OP_DOLL:
1314: case OP_DOLLM:
1315: case OP_CHAR:
1316: case OP_CHARI:
1317: case OP_NOT:
1318: case OP_NOTI:
1319:
1320: case OP_EXACT:
1321: case OP_POSSTAR:
1322: case OP_POSPLUS:
1323: case OP_POSQUERY:
1324: case OP_POSUPTO:
1325:
1326: case OP_EXACTI:
1327: case OP_POSSTARI:
1328: case OP_POSPLUSI:
1329: case OP_POSQUERYI:
1330: case OP_POSUPTOI:
1331:
1332: case OP_NOTEXACT:
1333: case OP_NOTPOSSTAR:
1334: case OP_NOTPOSPLUS:
1335: case OP_NOTPOSQUERY:
1336: case OP_NOTPOSUPTO:
1337:
1338: case OP_NOTEXACTI:
1339: case OP_NOTPOSSTARI:
1340: case OP_NOTPOSPLUSI:
1341: case OP_NOTPOSQUERYI:
1342: case OP_NOTPOSUPTOI:
1343:
1344: case OP_TYPEEXACT:
1345: case OP_TYPEPOSSTAR:
1346: case OP_TYPEPOSPLUS:
1347: case OP_TYPEPOSQUERY:
1348: case OP_TYPEPOSUPTO:
1349:
1350: case OP_CLASS:
1351: case OP_NCLASS:
1352: case OP_XCLASS:
1353:
1354: cc = next_opcode(common, cc);
1355: SLJIT_ASSERT(cc != NULL);
1356: break;
1357: }
1358:
1359: /* Possessive quantifiers can use a special case. */
1360: if (SLJIT_UNLIKELY(possessive == length))
1361: return stack_restore ? no_frame : no_stack;
1362:
1363: if (length > 0)
1364: return length + 1;
1365: return stack_restore ? no_frame : no_stack;
1366: }
1367:
1368: static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1369: {
1370: DEFINE_COMPILER;
1371: BOOL setsom_found = recursive;
1372: BOOL setmark_found = recursive;
1373: /* The last capture is a local variable even for recursions. */
1374: BOOL capture_last_found = FALSE;
1375: int offset;
1376:
1377: /* >= 1 + shortest item size (2) */
1378: SLJIT_UNUSED_ARG(stacktop);
1379: SLJIT_ASSERT(stackpos >= stacktop + 2);
1380:
1381: stackpos = STACK(stackpos);
1382: if (ccend == NULL)
1383: {
1384: ccend = bracketend(cc) - (1 + LINK_SIZE);
1385: if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1386: cc = next_opcode(common, cc);
1387: }
1388:
1389: SLJIT_ASSERT(cc != NULL);
1390: while (cc < ccend)
1391: switch(*cc)
1392: {
1393: case OP_SET_SOM:
1394: SLJIT_ASSERT(common->has_set_som);
1395: if (!setsom_found)
1396: {
1397: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1398: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1399: stackpos += (int)sizeof(sljit_sw);
1400: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1401: stackpos += (int)sizeof(sljit_sw);
1402: setsom_found = TRUE;
1403: }
1404: cc += 1;
1405: break;
1406:
1407: case OP_MARK:
1408: case OP_PRUNE_ARG:
1409: case OP_THEN_ARG:
1410: SLJIT_ASSERT(common->mark_ptr != 0);
1411: if (!setmark_found)
1412: {
1413: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1414: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1415: stackpos += (int)sizeof(sljit_sw);
1416: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1417: stackpos += (int)sizeof(sljit_sw);
1418: setmark_found = TRUE;
1419: }
1420: cc += 1 + 2 + cc[1];
1421: break;
1422:
1423: case OP_RECURSE:
1424: if (common->has_set_som && !setsom_found)
1425: {
1426: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1427: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1428: stackpos += (int)sizeof(sljit_sw);
1429: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1430: stackpos += (int)sizeof(sljit_sw);
1431: setsom_found = TRUE;
1432: }
1433: if (common->mark_ptr != 0 && !setmark_found)
1434: {
1435: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1436: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1437: stackpos += (int)sizeof(sljit_sw);
1438: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1439: stackpos += (int)sizeof(sljit_sw);
1440: setmark_found = TRUE;
1441: }
1442: if (common->capture_last_ptr != 0 && !capture_last_found)
1443: {
1444: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1445: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1446: stackpos += (int)sizeof(sljit_sw);
1447: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448: stackpos += (int)sizeof(sljit_sw);
1449: capture_last_found = TRUE;
1450: }
1451: cc += 1 + LINK_SIZE;
1452: break;
1453:
1454: case OP_CBRA:
1455: case OP_CBRAPOS:
1456: case OP_SCBRA:
1457: case OP_SCBRAPOS:
1458: if (common->capture_last_ptr != 0 && !capture_last_found)
1459: {
1460: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1461: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1462: stackpos += (int)sizeof(sljit_sw);
1463: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464: stackpos += (int)sizeof(sljit_sw);
1465: capture_last_found = TRUE;
1466: }
1467: offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1468: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1469: stackpos += (int)sizeof(sljit_sw);
1470: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1471: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1472: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473: stackpos += (int)sizeof(sljit_sw);
1474: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1475: stackpos += (int)sizeof(sljit_sw);
1476:
1477: cc += 1 + LINK_SIZE + IMM2_SIZE;
1478: break;
1479:
1480: default:
1481: cc = next_opcode(common, cc);
1482: SLJIT_ASSERT(cc != NULL);
1483: break;
1484: }
1485:
1486: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1487: SLJIT_ASSERT(stackpos == STACK(stacktop));
1488: }
1489:
1490: static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1491: {
1492: int private_data_length = needs_control_head ? 3 : 2;
1493: int size;
1494: pcre_uchar *alternative;
1495: /* Calculate the sum of the private machine words. */
1496: while (cc < ccend)
1497: {
1498: size = 0;
1499: switch(*cc)
1500: {
1501: case OP_KET:
1502: if (PRIVATE_DATA(cc) != 0)
1503: private_data_length++;
1504: cc += 1 + LINK_SIZE;
1505: break;
1506:
1507: case OP_ASSERT:
1508: case OP_ASSERT_NOT:
1509: case OP_ASSERTBACK:
1510: case OP_ASSERTBACK_NOT:
1511: case OP_ONCE:
1512: case OP_ONCE_NC:
1513: case OP_BRAPOS:
1514: case OP_SBRA:
1515: case OP_SBRAPOS:
1516: case OP_SCOND:
1517: private_data_length++;
1518: cc += 1 + LINK_SIZE;
1519: break;
1520:
1521: case OP_CBRA:
1522: case OP_SCBRA:
1523: if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1524: private_data_length++;
1525: cc += 1 + LINK_SIZE + IMM2_SIZE;
1526: break;
1527:
1528: case OP_CBRAPOS:
1529: case OP_SCBRAPOS:
1530: private_data_length += 2;
1531: cc += 1 + LINK_SIZE + IMM2_SIZE;
1532: break;
1533:
1534: case OP_COND:
1535: /* Might be a hidden SCOND. */
1536: alternative = cc + GET(cc, 1);
1537: if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1538: private_data_length++;
1539: cc += 1 + LINK_SIZE;
1540: break;
1541:
1542: CASE_ITERATOR_PRIVATE_DATA_1
1543: if (PRIVATE_DATA(cc))
1544: private_data_length++;
1545: cc += 2;
1546: #ifdef SUPPORT_UTF
1547: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1548: #endif
1549: break;
1550:
1551: CASE_ITERATOR_PRIVATE_DATA_2A
1552: if (PRIVATE_DATA(cc))
1553: private_data_length += 2;
1554: cc += 2;
1555: #ifdef SUPPORT_UTF
1556: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1557: #endif
1558: break;
1559:
1560: CASE_ITERATOR_PRIVATE_DATA_2B
1561: if (PRIVATE_DATA(cc))
1562: private_data_length += 2;
1563: cc += 2 + IMM2_SIZE;
1564: #ifdef SUPPORT_UTF
1565: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1566: #endif
1567: break;
1568:
1569: CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1570: if (PRIVATE_DATA(cc))
1571: private_data_length++;
1572: cc += 1;
1573: break;
1574:
1575: CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1576: if (PRIVATE_DATA(cc))
1577: private_data_length += 2;
1578: cc += 1;
1579: break;
1580:
1581: CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1582: if (PRIVATE_DATA(cc))
1583: private_data_length += 2;
1584: cc += 1 + IMM2_SIZE;
1585: break;
1586:
1587: case OP_CLASS:
1588: case OP_NCLASS:
1589: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1590: case OP_XCLASS:
1591: size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1592: #else
1593: size = 1 + 32 / (int)sizeof(pcre_uchar);
1594: #endif
1595: if (PRIVATE_DATA(cc))
1596: private_data_length += get_class_iterator_size(cc + size);
1597: cc += size;
1598: break;
1599:
1600: default:
1601: cc = next_opcode(common, cc);
1602: SLJIT_ASSERT(cc != NULL);
1603: break;
1604: }
1605: }
1606: SLJIT_ASSERT(cc == ccend);
1607: return private_data_length;
1608: }
1609:
1610: static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1611: BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1612: {
1613: DEFINE_COMPILER;
1614: int srcw[2];
1615: int count, size;
1616: BOOL tmp1next = TRUE;
1617: BOOL tmp1empty = TRUE;
1618: BOOL tmp2empty = TRUE;
1619: pcre_uchar *alternative;
1620: enum {
1621: start,
1622: loop,
1623: end
1624: } status;
1625:
1626: status = save ? start : loop;
1627: stackptr = STACK(stackptr - 2);
1628: stacktop = STACK(stacktop - 1);
1629:
1630: if (!save)
1631: {
1632: stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1633: if (stackptr < stacktop)
1634: {
1635: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1636: stackptr += sizeof(sljit_sw);
1637: tmp1empty = FALSE;
1638: }
1639: if (stackptr < stacktop)
1640: {
1641: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1642: stackptr += sizeof(sljit_sw);
1643: tmp2empty = FALSE;
1644: }
1645: /* The tmp1next must be TRUE in either way. */
1646: }
1647:
1648: do
1649: {
1650: count = 0;
1651: switch(status)
1652: {
1653: case start:
1654: SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1655: count = 1;
1656: srcw[0] = common->recursive_head_ptr;
1657: if (needs_control_head)
1658: {
1659: SLJIT_ASSERT(common->control_head_ptr != 0);
1660: count = 2;
1661: srcw[1] = common->control_head_ptr;
1662: }
1663: status = loop;
1664: break;
1665:
1666: case loop:
1667: if (cc >= ccend)
1668: {
1669: status = end;
1670: break;
1671: }
1672:
1673: switch(*cc)
1674: {
1675: case OP_KET:
1676: if (PRIVATE_DATA(cc) != 0)
1677: {
1678: count = 1;
1679: srcw[0] = PRIVATE_DATA(cc);
1680: }
1681: cc += 1 + LINK_SIZE;
1682: break;
1683:
1684: case OP_ASSERT:
1685: case OP_ASSERT_NOT:
1686: case OP_ASSERTBACK:
1687: case OP_ASSERTBACK_NOT:
1688: case OP_ONCE:
1689: case OP_ONCE_NC:
1690: case OP_BRAPOS:
1691: case OP_SBRA:
1692: case OP_SBRAPOS:
1693: case OP_SCOND:
1694: count = 1;
1695: srcw[0] = PRIVATE_DATA(cc);
1696: SLJIT_ASSERT(srcw[0] != 0);
1697: cc += 1 + LINK_SIZE;
1698: break;
1699:
1700: case OP_CBRA:
1701: case OP_SCBRA:
1702: if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1703: {
1704: count = 1;
1705: srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1706: }
1707: cc += 1 + LINK_SIZE + IMM2_SIZE;
1708: break;
1709:
1710: case OP_CBRAPOS:
1711: case OP_SCBRAPOS:
1712: count = 2;
1713: srcw[0] = PRIVATE_DATA(cc);
1714: srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1715: SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1716: cc += 1 + LINK_SIZE + IMM2_SIZE;
1717: break;
1718:
1719: case OP_COND:
1720: /* Might be a hidden SCOND. */
1721: alternative = cc + GET(cc, 1);
1722: if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1723: {
1724: count = 1;
1725: srcw[0] = PRIVATE_DATA(cc);
1726: SLJIT_ASSERT(srcw[0] != 0);
1727: }
1728: cc += 1 + LINK_SIZE;
1729: break;
1730:
1731: CASE_ITERATOR_PRIVATE_DATA_1
1732: if (PRIVATE_DATA(cc))
1733: {
1734: count = 1;
1735: srcw[0] = PRIVATE_DATA(cc);
1736: }
1737: cc += 2;
1738: #ifdef SUPPORT_UTF
1739: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1740: #endif
1741: break;
1742:
1743: CASE_ITERATOR_PRIVATE_DATA_2A
1744: if (PRIVATE_DATA(cc))
1745: {
1746: count = 2;
1747: srcw[0] = PRIVATE_DATA(cc);
1748: srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1749: }
1750: cc += 2;
1751: #ifdef SUPPORT_UTF
1752: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1753: #endif
1754: break;
1755:
1756: CASE_ITERATOR_PRIVATE_DATA_2B
1757: if (PRIVATE_DATA(cc))
1758: {
1759: count = 2;
1760: srcw[0] = PRIVATE_DATA(cc);
1761: srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1762: }
1763: cc += 2 + IMM2_SIZE;
1764: #ifdef SUPPORT_UTF
1765: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1766: #endif
1767: break;
1768:
1769: CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1770: if (PRIVATE_DATA(cc))
1771: {
1772: count = 1;
1773: srcw[0] = PRIVATE_DATA(cc);
1774: }
1775: cc += 1;
1776: break;
1777:
1778: CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1779: if (PRIVATE_DATA(cc))
1780: {
1781: count = 2;
1782: srcw[0] = PRIVATE_DATA(cc);
1783: srcw[1] = srcw[0] + sizeof(sljit_sw);
1784: }
1785: cc += 1;
1786: break;
1787:
1788: CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1789: if (PRIVATE_DATA(cc))
1790: {
1791: count = 2;
1792: srcw[0] = PRIVATE_DATA(cc);
1793: srcw[1] = srcw[0] + sizeof(sljit_sw);
1794: }
1795: cc += 1 + IMM2_SIZE;
1796: break;
1797:
1798: case OP_CLASS:
1799: case OP_NCLASS:
1800: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1801: case OP_XCLASS:
1802: size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1803: #else
1804: size = 1 + 32 / (int)sizeof(pcre_uchar);
1805: #endif
1806: if (PRIVATE_DATA(cc))
1807: switch(get_class_iterator_size(cc + size))
1808: {
1809: case 1:
1810: count = 1;
1811: srcw[0] = PRIVATE_DATA(cc);
1812: break;
1813:
1814: case 2:
1815: count = 2;
1816: srcw[0] = PRIVATE_DATA(cc);
1817: srcw[1] = srcw[0] + sizeof(sljit_sw);
1818: break;
1819:
1820: default:
1821: SLJIT_ASSERT_STOP();
1822: break;
1823: }
1824: cc += size;
1825: break;
1826:
1827: default:
1828: cc = next_opcode(common, cc);
1829: SLJIT_ASSERT(cc != NULL);
1830: break;
1831: }
1832: break;
1833:
1834: case end:
1835: SLJIT_ASSERT_STOP();
1836: break;
1837: }
1838:
1839: while (count > 0)
1840: {
1841: count--;
1842: if (save)
1843: {
1844: if (tmp1next)
1845: {
1846: if (!tmp1empty)
1847: {
1848: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1849: stackptr += sizeof(sljit_sw);
1850: }
1851: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1852: tmp1empty = FALSE;
1853: tmp1next = FALSE;
1854: }
1855: else
1856: {
1857: if (!tmp2empty)
1858: {
1859: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1860: stackptr += sizeof(sljit_sw);
1861: }
1862: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1863: tmp2empty = FALSE;
1864: tmp1next = TRUE;
1865: }
1866: }
1867: else
1868: {
1869: if (tmp1next)
1870: {
1871: SLJIT_ASSERT(!tmp1empty);
1872: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1873: tmp1empty = stackptr >= stacktop;
1874: if (!tmp1empty)
1875: {
1876: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1877: stackptr += sizeof(sljit_sw);
1878: }
1879: tmp1next = FALSE;
1880: }
1881: else
1882: {
1883: SLJIT_ASSERT(!tmp2empty);
1884: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1885: tmp2empty = stackptr >= stacktop;
1886: if (!tmp2empty)
1887: {
1888: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1889: stackptr += sizeof(sljit_sw);
1890: }
1891: tmp1next = TRUE;
1892: }
1893: }
1894: }
1895: }
1896: while (status != end);
1897:
1898: if (save)
1899: {
1900: if (tmp1next)
1901: {
1902: if (!tmp1empty)
1903: {
1904: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1905: stackptr += sizeof(sljit_sw);
1906: }
1907: if (!tmp2empty)
1908: {
1909: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1910: stackptr += sizeof(sljit_sw);
1911: }
1912: }
1913: else
1914: {
1915: if (!tmp2empty)
1916: {
1917: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1918: stackptr += sizeof(sljit_sw);
1919: }
1920: if (!tmp1empty)
1921: {
1922: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1923: stackptr += sizeof(sljit_sw);
1924: }
1925: }
1926: }
1927: SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1928: }
1929:
1930: static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1931: {
1932: pcre_uchar *end = bracketend(cc);
1933: BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1934:
1935: /* Assert captures then. */
1936: if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1937: current_offset = NULL;
1938: /* Conditional block does not. */
1939: if (*cc == OP_COND || *cc == OP_SCOND)
1940: has_alternatives = FALSE;
1941:
1942: cc = next_opcode(common, cc);
1943: if (has_alternatives)
1944: current_offset = common->then_offsets + (cc - common->start);
1945:
1946: while (cc < end)
1947: {
1948: if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1949: cc = set_then_offsets(common, cc, current_offset);
1950: else
1951: {
1952: if (*cc == OP_ALT && has_alternatives)
1953: current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1954: if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1955: *current_offset = 1;
1956: cc = next_opcode(common, cc);
1957: }
1958: }
1959:
1960: return end;
1961: }
1962:
1963: #undef CASE_ITERATOR_PRIVATE_DATA_1
1964: #undef CASE_ITERATOR_PRIVATE_DATA_2A
1965: #undef CASE_ITERATOR_PRIVATE_DATA_2B
1966: #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1967: #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1968: #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1969:
1970: static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1971: {
1972: return (value & (value - 1)) == 0;
1973: }
1974:
1975: static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1976: {
1977: while (list)
1978: {
1979: /* sljit_set_label is clever enough to do nothing
1980: if either the jump or the label is NULL. */
1981: SET_LABEL(list->jump, label);
1982: list = list->next;
1983: }
1984: }
1985:
1986: static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1987: {
1988: jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1989: if (list_item)
1990: {
1991: list_item->next = *list;
1992: list_item->jump = jump;
1993: *list = list_item;
1994: }
1995: }
1996:
1997: static void add_stub(compiler_common *common, struct sljit_jump *start)
1998: {
1999: DEFINE_COMPILER;
2000: stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2001:
2002: if (list_item)
2003: {
2004: list_item->start = start;
2005: list_item->quit = LABEL();
2006: list_item->next = common->stubs;
2007: common->stubs = list_item;
2008: }
2009: }
2010:
2011: static void flush_stubs(compiler_common *common)
2012: {
2013: DEFINE_COMPILER;
2014: stub_list* list_item = common->stubs;
2015:
2016: while (list_item)
2017: {
2018: JUMPHERE(list_item->start);
2019: add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2020: JUMPTO(SLJIT_JUMP, list_item->quit);
2021: list_item = list_item->next;
2022: }
2023: common->stubs = NULL;
2024: }
2025:
2026: static SLJIT_INLINE void count_match(compiler_common *common)
2027: {
2028: DEFINE_COMPILER;
2029:
2030: OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2031: add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2032: }
2033:
2034: static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2035: {
2036: /* May destroy all locals and registers except TMP2. */
2037: DEFINE_COMPILER;
2038:
2039: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2040: #ifdef DESTROY_REGISTERS
2041: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2042: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2043: OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2044: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2045: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2046: #endif
2047: add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2048: }
2049:
2050: static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2051: {
2052: DEFINE_COMPILER;
2053: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2054: }
2055:
2056: static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2057: {
2058: DEFINE_COMPILER;
2059: struct sljit_label *loop;
2060: int i;
2061:
2062: /* At this point we can freely use all temporary registers. */
2063: SLJIT_ASSERT(length > 1);
2064: /* TMP1 returns with begin - 1. */
2065: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2066: if (length < 8)
2067: {
2068: for (i = 1; i < length; i++)
2069: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2070: }
2071: else
2072: {
2073: GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2074: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2075: loop = LABEL();
2076: OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2077: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2078: JUMPTO(SLJIT_C_NOT_ZERO, loop);
2079: }
2080: }
2081:
2082: static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2083: {
2084: DEFINE_COMPILER;
2085: struct sljit_label *loop;
2086: int i;
2087:
2088: SLJIT_ASSERT(length > 1);
2089: /* OVECTOR(1) contains the "string begin - 1" constant. */
2090: if (length > 2)
2091: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2092: if (length < 8)
2093: {
2094: for (i = 2; i < length; i++)
2095: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2096: }
2097: else
2098: {
2099: GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2100: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2101: loop = LABEL();
2102: OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2103: OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2104: JUMPTO(SLJIT_C_NOT_ZERO, loop);
2105: }
2106:
2107: OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2108: if (common->mark_ptr != 0)
2109: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2110: if (common->control_head_ptr != 0)
2111: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2112: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2113: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2114: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2115: }
2116:
2117: static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2118: {
2119: while (current != NULL)
2120: {
2121: switch (current[-2])
2122: {
2123: case type_then_trap:
2124: break;
2125:
2126: case type_mark:
2127: if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2128: return current[-4];
2129: break;
2130:
2131: default:
2132: SLJIT_ASSERT_STOP();
2133: break;
2134: }
2135: current = (sljit_sw*)current[-1];
2136: }
2137: return -1;
2138: }
2139:
2140: static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2141: {
2142: DEFINE_COMPILER;
2143: struct sljit_label *loop;
2144: struct sljit_jump *early_quit;
2145:
2146: /* At this point we can freely use all registers. */
2147: OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2148: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2149:
2150: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2151: if (common->mark_ptr != 0)
2152: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2153: OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2154: if (common->mark_ptr != 0)
2155: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2156: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2157: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2158: GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2159: /* Unlikely, but possible */
2160: early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2161: loop = LABEL();
2162: OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2163: OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2164: /* Copy the integer value to the output buffer */
2165: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2166: OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2167: #endif
2168: OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2169: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2170: JUMPTO(SLJIT_C_NOT_ZERO, loop);
2171: JUMPHERE(early_quit);
2172:
2173: /* Calculate the return value, which is the maximum ovector value. */
2174: if (topbracket > 1)
2175: {
2176: GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2177: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2178:
2179: /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2180: loop = LABEL();
2181: OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2182: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2183: CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2184: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2185: }
2186: else
2187: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2188: }
2189:
2190: static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2191: {
2192: DEFINE_COMPILER;
2193: struct sljit_jump *jump;
2194:
2195: SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2196: SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2197: && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2198:
2199: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2200: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2201: OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2202: CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2203:
2204: /* Store match begin and end. */
2205: OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2206: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2207:
2208: jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2209: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2210: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2211: OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2212: #endif
2213: OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2214: JUMPHERE(jump);
2215:
2216: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2217: OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2218: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2219: OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2220: #endif
2221: OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2222:
2223: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2224: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2225: OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2226: #endif
2227: OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2228:
2229: JUMPTO(SLJIT_JUMP, quit);
2230: }
2231:
2232: static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2233: {
2234: /* May destroy TMP1. */
2235: DEFINE_COMPILER;
2236: struct sljit_jump *jump;
2237:
2238: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2239: {
2240: /* The value of -1 must be kept for start_used_ptr! */
2241: OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2242: /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2243: is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2244: jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2245: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2246: JUMPHERE(jump);
2247: }
2248: else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2249: {
2250: jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2252: JUMPHERE(jump);
2253: }
2254: }
2255:
2256: static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2257: {
2258: /* Detects if the character has an othercase. */
2259: unsigned int c;
2260:
2261: #ifdef SUPPORT_UTF
2262: if (common->utf)
2263: {
2264: GETCHAR(c, cc);
2265: if (c > 127)
2266: {
2267: #ifdef SUPPORT_UCP
2268: return c != UCD_OTHERCASE(c);
2269: #else
2270: return FALSE;
2271: #endif
2272: }
2273: #ifndef COMPILE_PCRE8
2274: return common->fcc[c] != c;
2275: #endif
2276: }
2277: else
2278: #endif
2279: c = *cc;
2280: return MAX_255(c) ? common->fcc[c] != c : FALSE;
2281: }
2282:
2283: static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2284: {
2285: /* Returns with the othercase. */
2286: #ifdef SUPPORT_UTF
2287: if (common->utf && c > 127)
2288: {
2289: #ifdef SUPPORT_UCP
2290: return UCD_OTHERCASE(c);
2291: #else
2292: return c;
2293: #endif
2294: }
2295: #endif
2296: return TABLE_GET(c, common->fcc, c);
2297: }
2298:
2299: static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2300: {
2301: /* Detects if the character and its othercase has only 1 bit difference. */
2302: unsigned int c, oc, bit;
2303: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2304: int n;
2305: #endif
2306:
2307: #ifdef SUPPORT_UTF
2308: if (common->utf)
2309: {
2310: GETCHAR(c, cc);
2311: if (c <= 127)
2312: oc = common->fcc[c];
2313: else
2314: {
2315: #ifdef SUPPORT_UCP
2316: oc = UCD_OTHERCASE(c);
2317: #else
2318: oc = c;
2319: #endif
2320: }
2321: }
2322: else
2323: {
2324: c = *cc;
2325: oc = TABLE_GET(c, common->fcc, c);
2326: }
2327: #else
2328: c = *cc;
2329: oc = TABLE_GET(c, common->fcc, c);
2330: #endif
2331:
2332: SLJIT_ASSERT(c != oc);
2333:
2334: bit = c ^ oc;
2335: /* Optimized for English alphabet. */
2336: if (c <= 127 && bit == 0x20)
2337: return (0 << 8) | 0x20;
2338:
2339: /* Since c != oc, they must have at least 1 bit difference. */
2340: if (!is_powerof2(bit))
2341: return 0;
2342:
2343: #if defined COMPILE_PCRE8
2344:
2345: #ifdef SUPPORT_UTF
2346: if (common->utf && c > 127)
2347: {
2348: n = GET_EXTRALEN(*cc);
2349: while ((bit & 0x3f) == 0)
2350: {
2351: n--;
2352: bit >>= 6;
2353: }
2354: return (n << 8) | bit;
2355: }
2356: #endif /* SUPPORT_UTF */
2357: return (0 << 8) | bit;
2358:
2359: #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2360:
2361: #ifdef SUPPORT_UTF
2362: if (common->utf && c > 65535)
2363: {
2364: if (bit >= (1 << 10))
2365: bit >>= 10;
2366: else
2367: return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2368: }
2369: #endif /* SUPPORT_UTF */
2370: return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2371:
2372: #endif /* COMPILE_PCRE[8|16|32] */
2373: }
2374:
2375: static void check_partial(compiler_common *common, BOOL force)
2376: {
2377: /* Checks whether a partial matching is occurred. Does not modify registers. */
2378: DEFINE_COMPILER;
2379: struct sljit_jump *jump = NULL;
2380:
2381: SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2382:
2383: if (common->mode == JIT_COMPILE)
2384: return;
2385:
2386: if (!force)
2387: jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2388: else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2389: jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2390:
2391: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2392: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2393: else
2394: {
2395: if (common->partialmatchlabel != NULL)
2396: JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2397: else
2398: add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2399: }
2400:
2401: if (jump != NULL)
2402: JUMPHERE(jump);
2403: }
2404:
2405: static void check_str_end(compiler_common *common, jump_list **end_reached)
2406: {
2407: /* Does not affect registers. Usually used in a tight spot. */
2408: DEFINE_COMPILER;
2409: struct sljit_jump *jump;
2410:
2411: if (common->mode == JIT_COMPILE)
2412: {
2413: add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2414: return;
2415: }
2416:
2417: jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2418: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2419: {
2420: add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2421: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2422: add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2423: }
2424: else
2425: {
2426: add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2427: if (common->partialmatchlabel != NULL)
2428: JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2429: else
2430: add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2431: }
2432: JUMPHERE(jump);
2433: }
2434:
2435: static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2436: {
2437: DEFINE_COMPILER;
2438: struct sljit_jump *jump;
2439:
2440: if (common->mode == JIT_COMPILE)
2441: {
2442: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2443: return;
2444: }
2445:
2446: /* Partial matching mode. */
2447: jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2448: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2449: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450: {
2451: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2452: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2453: }
2454: else
2455: {
2456: if (common->partialmatchlabel != NULL)
2457: JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458: else
2459: add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460: }
2461: JUMPHERE(jump);
2462: }
2463:
2464: static void read_char(compiler_common *common)
2465: {
2466: /* Reads the character into TMP1, updates STR_PTR.
2467: Does not check STR_END. TMP2 Destroyed. */
2468: DEFINE_COMPILER;
2469: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2470: struct sljit_jump *jump;
2471: #endif
2472:
2473: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2474: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475: if (common->utf)
2476: {
2477: #if defined COMPILE_PCRE8
2478: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2479: #elif defined COMPILE_PCRE16
2480: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2481: #endif /* COMPILE_PCRE[8|16] */
2482: add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2483: JUMPHERE(jump);
2484: }
2485: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2486: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2487: }
2488:
2489: static void peek_char(compiler_common *common)
2490: {
2491: /* Reads the character into TMP1, keeps STR_PTR.
2492: Does not check STR_END. TMP2 Destroyed. */
2493: DEFINE_COMPILER;
2494: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2495: struct sljit_jump *jump;
2496: #endif
2497:
2498: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2499: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2500: if (common->utf)
2501: {
2502: #if defined COMPILE_PCRE8
2503: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2504: #elif defined COMPILE_PCRE16
2505: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2506: #endif /* COMPILE_PCRE[8|16] */
2507: add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2508: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2509: JUMPHERE(jump);
2510: }
2511: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2512: }
2513:
2514: static void read_char8_type(compiler_common *common)
2515: {
2516: /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2517: DEFINE_COMPILER;
2518: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2519: struct sljit_jump *jump;
2520: #endif
2521:
2522: #ifdef SUPPORT_UTF
2523: if (common->utf)
2524: {
2525: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2526: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2527: #if defined COMPILE_PCRE8
2528: /* This can be an extra read in some situations, but hopefully
2529: it is needed in most cases. */
2530: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2531: jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2532: add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2533: JUMPHERE(jump);
2534: #elif defined COMPILE_PCRE16
2535: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2536: jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2537: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2538: JUMPHERE(jump);
2539: /* Skip low surrogate if necessary. */
2540: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2541: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2542: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2543: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2544: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2545: #elif defined COMPILE_PCRE32
2546: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2547: jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2548: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2549: JUMPHERE(jump);
2550: #endif /* COMPILE_PCRE[8|16|32] */
2551: return;
2552: }
2553: #endif /* SUPPORT_UTF */
2554: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2555: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2556: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2557: /* The ctypes array contains only 256 values. */
2558: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2559: jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2560: #endif
2561: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2562: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2563: JUMPHERE(jump);
2564: #endif
2565: }
2566:
2567: static void skip_char_back(compiler_common *common)
2568: {
2569: /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2570: DEFINE_COMPILER;
2571: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2572: #if defined COMPILE_PCRE8
2573: struct sljit_label *label;
2574:
2575: if (common->utf)
2576: {
2577: label = LABEL();
2578: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2579: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2580: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2581: CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2582: return;
2583: }
2584: #elif defined COMPILE_PCRE16
2585: if (common->utf)
2586: {
2587: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2588: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2589: /* Skip low surrogate if necessary. */
2590: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2591: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2592: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2593: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2594: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2595: return;
2596: }
2597: #endif /* COMPILE_PCRE[8|16] */
2598: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2599: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600: }
2601:
2602: static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2603: {
2604: /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2605: DEFINE_COMPILER;
2606:
2607: if (nltype == NLTYPE_ANY)
2608: {
2609: add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2610: add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2611: }
2612: else if (nltype == NLTYPE_ANYCRLF)
2613: {
2614: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2615: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2616: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2617: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2618: add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2619: }
2620: else
2621: {
2622: SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2623: add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2624: }
2625: }
2626:
2627: #ifdef SUPPORT_UTF
2628:
2629: #if defined COMPILE_PCRE8
2630: static void do_utfreadchar(compiler_common *common)
2631: {
2632: /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2633: of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2634: DEFINE_COMPILER;
2635: struct sljit_jump *jump;
2636:
2637: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2638: /* Searching for the first zero. */
2639: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2640: jump = JUMP(SLJIT_C_NOT_ZERO);
2641: /* Two byte sequence. */
2642: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2643: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2644: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2645: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2646: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2647: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2648: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2649: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2650: JUMPHERE(jump);
2651:
2652: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2653: jump = JUMP(SLJIT_C_NOT_ZERO);
2654: /* Three byte sequence. */
2655: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2656: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2657: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2658: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2660: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2662: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2663: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2664: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2665: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2666: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2667: JUMPHERE(jump);
2668:
2669: /* Four byte sequence. */
2670: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2671: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2672: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2673: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2674: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2675: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2676: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2677: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2678: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2679: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2681: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2682: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2683: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2684: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2685: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2686: }
2687:
2688: static void do_utfreadtype8(compiler_common *common)
2689: {
2690: /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2691: of the character (>= 0xc0). Return value in TMP1. */
2692: DEFINE_COMPILER;
2693: struct sljit_jump *jump;
2694: struct sljit_jump *compare;
2695:
2696: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2697:
2698: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2699: jump = JUMP(SLJIT_C_NOT_ZERO);
2700: /* Two byte sequence. */
2701: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2702: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2704: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2705: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2706: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2707: compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2708: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2709: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2710:
2711: JUMPHERE(compare);
2712: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2713: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2714: JUMPHERE(jump);
2715:
2716: /* We only have types for characters less than 256. */
2717: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2718: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2719: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2720: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2721: }
2722:
2723: #elif defined COMPILE_PCRE16
2724:
2725: static void do_utfreadchar(compiler_common *common)
2726: {
2727: /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2728: of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2729: DEFINE_COMPILER;
2730: struct sljit_jump *jump;
2731:
2732: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2733: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2734: /* Do nothing, only return. */
2735: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2736:
2737: JUMPHERE(jump);
2738: /* Combine two 16 bit characters. */
2739: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2740: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2741: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2742: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2743: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2744: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2745: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2746: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2747: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2748: }
2749:
2750: #endif /* COMPILE_PCRE[8|16] */
2751:
2752: #endif /* SUPPORT_UTF */
2753:
2754: #ifdef SUPPORT_UCP
2755:
2756: /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2757: #define UCD_BLOCK_MASK 127
2758: #define UCD_BLOCK_SHIFT 7
2759:
2760: static void do_getucd(compiler_common *common)
2761: {
2762: /* Search the UCD record for the character comes in TMP1.
2763: Returns chartype in TMP1 and UCD offset in TMP2. */
2764: DEFINE_COMPILER;
2765:
2766: SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2767:
2768: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2769: OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2770: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2771: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2772: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2773: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2774: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2775: OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2776: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2777: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2778: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2779: }
2780: #endif
2781:
2782: static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2783: {
2784: DEFINE_COMPILER;
2785: struct sljit_label *mainloop;
2786: struct sljit_label *newlinelabel = NULL;
2787: struct sljit_jump *start;
2788: struct sljit_jump *end = NULL;
2789: struct sljit_jump *nl = NULL;
2790: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2791: struct sljit_jump *singlechar;
2792: #endif
2793: jump_list *newline = NULL;
2794: BOOL newlinecheck = FALSE;
2795: BOOL readuchar = FALSE;
2796:
2797: if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2798: common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2799: newlinecheck = TRUE;
2800:
2801: if (firstline)
2802: {
2803: /* Search for the end of the first line. */
2804: SLJIT_ASSERT(common->first_line_end != 0);
2805: OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2806:
2807: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2808: {
2809: mainloop = LABEL();
2810: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2811: end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2812: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2813: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2814: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2815: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2816: JUMPHERE(end);
2817: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2818: }
2819: else
2820: {
2821: end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2822: mainloop = LABEL();
2823: /* Continual stores does not cause data dependency. */
2824: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2825: read_char(common);
2826: check_newlinechar(common, common->nltype, &newline, TRUE);
2827: CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2828: JUMPHERE(end);
2829: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2830: set_jumps(newline, LABEL());
2831: }
2832:
2833: OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2834: }
2835:
2836: start = JUMP(SLJIT_JUMP);
2837:
2838: if (newlinecheck)
2839: {
2840: newlinelabel = LABEL();
2841: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2842: end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2843: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2844: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2845: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2846: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2847: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2848: #endif
2849: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2850: nl = JUMP(SLJIT_JUMP);
2851: }
2852:
2853: mainloop = LABEL();
2854:
2855: /* Increasing the STR_PTR here requires one less jump in the most common case. */
2856: #ifdef SUPPORT_UTF
2857: if (common->utf) readuchar = TRUE;
2858: #endif
2859: if (newlinecheck) readuchar = TRUE;
2860:
2861: if (readuchar)
2862: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2863:
2864: if (newlinecheck)
2865: CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2866:
2867: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2868: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2869: #if defined COMPILE_PCRE8
2870: if (common->utf)
2871: {
2872: singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2873: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2874: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2875: JUMPHERE(singlechar);
2876: }
2877: #elif defined COMPILE_PCRE16
2878: if (common->utf)
2879: {
2880: singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2881: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2882: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2883: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2884: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2885: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2886: JUMPHERE(singlechar);
2887: }
2888: #endif /* COMPILE_PCRE[8|16] */
2889: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2890: JUMPHERE(start);
2891:
2892: if (newlinecheck)
2893: {
2894: JUMPHERE(end);
2895: JUMPHERE(nl);
2896: }
2897:
2898: return mainloop;
2899: }
2900:
2901: #define MAX_N_CHARS 3
2902:
2903: static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2904: {
2905: DEFINE_COMPILER;
2906: struct sljit_label *start;
2907: struct sljit_jump *quit;
2908: pcre_uint32 chars[MAX_N_CHARS * 2];
2909: pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2910: int location = 0;
2911: pcre_int32 len, c, bit, caseless;
2912: int must_stop;
2913:
2914: /* We do not support alternatives now. */
2915: if (*(common->start + GET(common->start, 1)) == OP_ALT)
2916: return FALSE;
2917:
2918: while (TRUE)
2919: {
2920: caseless = 0;
2921: must_stop = 1;
2922: switch(*cc)
2923: {
2924: case OP_CHAR:
2925: must_stop = 0;
2926: cc++;
2927: break;
2928:
2929: case OP_CHARI:
2930: caseless = 1;
2931: must_stop = 0;
2932: cc++;
2933: break;
2934:
2935: case OP_SOD:
2936: case OP_SOM:
2937: case OP_SET_SOM:
2938: case OP_NOT_WORD_BOUNDARY:
2939: case OP_WORD_BOUNDARY:
2940: case OP_EODN:
2941: case OP_EOD:
2942: case OP_CIRC:
2943: case OP_CIRCM:
2944: case OP_DOLL:
2945: case OP_DOLLM:
2946: /* Zero width assertions. */
2947: cc++;
2948: continue;
2949:
2950: case OP_PLUS:
2951: case OP_MINPLUS:
2952: case OP_POSPLUS:
2953: cc++;
2954: break;
2955:
2956: case OP_EXACT:
2957: cc += 1 + IMM2_SIZE;
2958: break;
2959:
2960: case OP_PLUSI:
2961: case OP_MINPLUSI:
2962: case OP_POSPLUSI:
2963: caseless = 1;
2964: cc++;
2965: break;
2966:
2967: case OP_EXACTI:
2968: caseless = 1;
2969: cc += 1 + IMM2_SIZE;
2970: break;
2971:
2972: default:
2973: must_stop = 2;
2974: break;
2975: }
2976:
2977: if (must_stop == 2)
2978: break;
2979:
2980: len = 1;
2981: #ifdef SUPPORT_UTF
2982: if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2983: #endif
2984:
2985: if (caseless && char_has_othercase(common, cc))
2986: {
2987: caseless = char_get_othercase_bit(common, cc);
2988: if (caseless == 0)
2989: return FALSE;
2990: #ifdef COMPILE_PCRE8
2991: caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2992: #else
2993: if ((caseless & 0x100) != 0)
2994: caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2995: else
2996: caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2997: #endif
2998: }
2999: else
3000: caseless = 0;
3001:
3002: while (len > 0 && location < MAX_N_CHARS * 2)
3003: {
3004: c = *cc;
3005: bit = 0;
3006: if (len == (caseless & 0xff))
3007: {
3008: bit = caseless >> 8;
3009: c |= bit;
3010: }
3011:
3012: chars[location] = c;
3013: chars[location + 1] = bit;
3014:
3015: len--;
3016: location += 2;
3017: cc++;
3018: }
3019:
3020: if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3021: break;
3022: }
3023:
3024: /* At least two characters are required. */
3025: if (location < 2 * 2)
3026: return FALSE;
3027:
3028: if (firstline)
3029: {
3030: SLJIT_ASSERT(common->first_line_end != 0);
3031: OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3032: OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3033: }
3034: else
3035: OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3036:
3037: start = LABEL();
3038: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3039:
3040: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3041: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3042: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3043: if (chars[1] != 0)
3044: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3045: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3046: if (location > 2 * 2)
3047: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3048: if (chars[3] != 0)
3049: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3050: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3051: if (location > 2 * 2)
3052: {
3053: if (chars[5] != 0)
3054: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3055: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3056: }
3057: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3058:
3059: JUMPHERE(quit);
3060:
3061: if (firstline)
3062: OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3063: else
3064: OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3065: return TRUE;
3066: }
3067:
3068: #undef MAX_N_CHARS
3069:
3070: static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3071: {
3072: DEFINE_COMPILER;
3073: struct sljit_label *start;
3074: struct sljit_jump *quit;
3075: struct sljit_jump *found;
3076: pcre_uchar oc, bit;
3077:
3078: if (firstline)
3079: {
3080: SLJIT_ASSERT(common->first_line_end != 0);
3081: OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3082: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3083: }
3084:
3085: start = LABEL();
3086: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3087: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3088:
3089: oc = first_char;
3090: if (caseless)
3091: {
3092: oc = TABLE_GET(first_char, common->fcc, first_char);
3093: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3094: if (first_char > 127 && common->utf)
3095: oc = UCD_OTHERCASE(first_char);
3096: #endif
3097: }
3098: if (first_char == oc)
3099: found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3100: else
3101: {
3102: bit = first_char ^ oc;
3103: if (is_powerof2(bit))
3104: {
3105: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3106: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3107: }
3108: else
3109: {
3110: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3111: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3112: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3113: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3114: found = JUMP(SLJIT_C_NOT_ZERO);
3115: }
3116: }
3117:
3118: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119: JUMPTO(SLJIT_JUMP, start);
3120: JUMPHERE(found);
3121: JUMPHERE(quit);
3122:
3123: if (firstline)
3124: OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3125: }
3126:
3127: static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3128: {
3129: DEFINE_COMPILER;
3130: struct sljit_label *loop;
3131: struct sljit_jump *lastchar;
3132: struct sljit_jump *firstchar;
3133: struct sljit_jump *quit;
3134: struct sljit_jump *foundcr = NULL;
3135: struct sljit_jump *notfoundnl;
3136: jump_list *newline = NULL;
3137:
3138: if (firstline)
3139: {
3140: SLJIT_ASSERT(common->first_line_end != 0);
3141: OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3142: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3143: }
3144:
3145: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3146: {
3147: lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3148: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3149: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3150: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3151: firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3152:
3153: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3154: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3155: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3156: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3157: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3158: #endif
3159: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3160:
3161: loop = LABEL();
3162: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3163: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3164: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3165: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3166: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3167: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3168:
3169: JUMPHERE(quit);
3170: JUMPHERE(firstchar);
3171: JUMPHERE(lastchar);
3172:
3173: if (firstline)
3174: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3175: return;
3176: }
3177:
3178: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3179: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3180: firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3181: skip_char_back(common);
3182:
3183: loop = LABEL();
3184: read_char(common);
3185: lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3186: if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3187: foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3188: check_newlinechar(common, common->nltype, &newline, FALSE);
3189: set_jumps(newline, loop);
3190:
3191: if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3192: {
3193: quit = JUMP(SLJIT_JUMP);
3194: JUMPHERE(foundcr);
3195: notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3196: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3197: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3198: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3199: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3200: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3201: #endif
3202: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3203: JUMPHERE(notfoundnl);
3204: JUMPHERE(quit);
3205: }
3206: JUMPHERE(lastchar);
3207: JUMPHERE(firstchar);
3208:
3209: if (firstline)
3210: OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3211: }
3212:
3213: static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3214:
3215: static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3216: {
3217: DEFINE_COMPILER;
3218: struct sljit_label *start;
3219: struct sljit_jump *quit;
3220: struct sljit_jump *found = NULL;
3221: jump_list *matches = NULL;
3222: pcre_uint8 inverted_start_bits[32];
3223: int i;
3224: #ifndef COMPILE_PCRE8
3225: struct sljit_jump *jump;
3226: #endif
3227:
3228: for (i = 0; i < 32; ++i)
3229: inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3230:
3231: if (firstline)
3232: {
3233: SLJIT_ASSERT(common->first_line_end != 0);
3234: OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3235: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3236: }
3237:
3238: start = LABEL();
3239: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3240: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3241: #ifdef SUPPORT_UTF
3242: if (common->utf)
3243: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3244: #endif
3245:
3246: if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3247: {
3248: #ifndef COMPILE_PCRE8
3249: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3250: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3251: JUMPHERE(jump);
3252: #endif
3253: OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3254: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3255: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3256: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3257: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3258: found = JUMP(SLJIT_C_NOT_ZERO);
3259: }
3260:
3261: #ifdef SUPPORT_UTF
3262: if (common->utf)
3263: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3264: #endif
3265: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3266: #ifdef SUPPORT_UTF
3267: #if defined COMPILE_PCRE8
3268: if (common->utf)
3269: {
3270: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3271: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3272: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3273: }
3274: #elif defined COMPILE_PCRE16
3275: if (common->utf)
3276: {
3277: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3278: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3279: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3280: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3281: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3282: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3283: }
3284: #endif /* COMPILE_PCRE[8|16] */
3285: #endif /* SUPPORT_UTF */
3286: JUMPTO(SLJIT_JUMP, start);
3287: if (found != NULL)
3288: JUMPHERE(found);
3289: if (matches != NULL)
3290: set_jumps(matches, LABEL());
3291: JUMPHERE(quit);
3292:
3293: if (firstline)
3294: OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3295: }
3296:
3297: static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3298: {
3299: DEFINE_COMPILER;
3300: struct sljit_label *loop;
3301: struct sljit_jump *toolong;
3302: struct sljit_jump *alreadyfound;
3303: struct sljit_jump *found;
3304: struct sljit_jump *foundoc = NULL;
3305: struct sljit_jump *notfound;
3306: pcre_uint32 oc, bit;
3307:
3308: SLJIT_ASSERT(common->req_char_ptr != 0);
3309: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3310: OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3311: toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3312: alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3313:
3314: if (has_firstchar)
3315: OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3316: else
3317: OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3318:
3319: loop = LABEL();
3320: notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3321:
3322: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3323: oc = req_char;
3324: if (caseless)
3325: {
3326: oc = TABLE_GET(req_char, common->fcc, req_char);
3327: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3328: if (req_char > 127 && common->utf)
3329: oc = UCD_OTHERCASE(req_char);
3330: #endif
3331: }
3332: if (req_char == oc)
3333: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3334: else
3335: {
3336: bit = req_char ^ oc;
3337: if (is_powerof2(bit))
3338: {
3339: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3340: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3341: }
3342: else
3343: {
3344: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3345: foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3346: }
3347: }
3348: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3349: JUMPTO(SLJIT_JUMP, loop);
3350:
3351: JUMPHERE(found);
3352: if (foundoc)
3353: JUMPHERE(foundoc);
3354: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3355: JUMPHERE(alreadyfound);
3356: JUMPHERE(toolong);
3357: return notfound;
3358: }
3359:
3360: static void do_revertframes(compiler_common *common)
3361: {
3362: DEFINE_COMPILER;
3363: struct sljit_jump *jump;
3364: struct sljit_label *mainloop;
3365:
3366: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3367: OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3368: GET_LOCAL_BASE(TMP3, 0, 0);
3369:
3370: /* Drop frames until we reach STACK_TOP. */
3371: mainloop = LABEL();
3372: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3373: OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3374: jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3375:
3376: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3377: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3378: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3379: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3380: JUMPTO(SLJIT_JUMP, mainloop);
3381:
3382: JUMPHERE(jump);
3383: jump = JUMP(SLJIT_C_SIG_LESS);
3384: /* End of dropping frames. */
3385: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3386:
3387: JUMPHERE(jump);
3388: OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3389: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3390: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3391: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3392: JUMPTO(SLJIT_JUMP, mainloop);
3393: }
3394:
3395: static void check_wordboundary(compiler_common *common)
3396: {
3397: DEFINE_COMPILER;
3398: struct sljit_jump *skipread;
3399: jump_list *skipread_list = NULL;
3400: #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3401: struct sljit_jump *jump;
3402: #endif
3403:
3404: SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3405:
3406: sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3407: /* Get type of the previous char, and put it to LOCALS1. */
3408: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3409: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3410: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3411: skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3412: skip_char_back(common);
3413: check_start_used_ptr(common);
3414: read_char(common);
3415:
3416: /* Testing char type. */
3417: #ifdef SUPPORT_UCP
3418: if (common->use_ucp)
3419: {
3420: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3421: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3422: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3423: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3424: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3425: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3426: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3427: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3428: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3429: JUMPHERE(jump);
3430: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3431: }
3432: else
3433: #endif
3434: {
3435: #ifndef COMPILE_PCRE8
3436: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3437: #elif defined SUPPORT_UTF
3438: /* Here LOCALS1 has already been zeroed. */
3439: jump = NULL;
3440: if (common->utf)
3441: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3442: #endif /* COMPILE_PCRE8 */
3443: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3444: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3445: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3446: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3447: #ifndef COMPILE_PCRE8
3448: JUMPHERE(jump);
3449: #elif defined SUPPORT_UTF
3450: if (jump != NULL)
3451: JUMPHERE(jump);
3452: #endif /* COMPILE_PCRE8 */
3453: }
3454: JUMPHERE(skipread);
3455:
3456: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3457: check_str_end(common, &skipread_list);
3458: peek_char(common);
3459:
3460: /* Testing char type. This is a code duplication. */
3461: #ifdef SUPPORT_UCP
3462: if (common->use_ucp)
3463: {
3464: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3465: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3466: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3467: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3468: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3469: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3470: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3471: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3472: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3473: JUMPHERE(jump);
3474: }
3475: else
3476: #endif
3477: {
3478: #ifndef COMPILE_PCRE8
3479: /* TMP2 may be destroyed by peek_char. */
3480: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3481: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3482: #elif defined SUPPORT_UTF
3483: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3484: jump = NULL;
3485: if (common->utf)
3486: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3487: #endif
3488: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3489: OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3490: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3491: #ifndef COMPILE_PCRE8
3492: JUMPHERE(jump);
3493: #elif defined SUPPORT_UTF
3494: if (jump != NULL)
3495: JUMPHERE(jump);
3496: #endif /* COMPILE_PCRE8 */
3497: }
3498: set_jumps(skipread_list, LABEL());
3499:
3500: OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3501: sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3502: }
3503:
3504: /*
3505: range format:
3506:
3507: ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3508: ranges[1] = first bit (0 or 1)
3509: ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3510: */
3511:
3512: static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3513: {
3514: DEFINE_COMPILER;
3515: struct sljit_jump *jump;
3516:
3517: if (ranges[0] < 0)
3518: return FALSE;
3519:
3520: switch(ranges[0])
3521: {
3522: case 1:
3523: if (readch)
3524: read_char(common);
3525: add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3526: return TRUE;
3527:
3528: case 2:
3529: if (readch)
3530: read_char(common);
3531: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3532: add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3533: return TRUE;
3534:
3535: case 4:
3536: if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3537: {
3538: if (readch)
3539: read_char(common);
3540: if (ranges[1] != 0)
3541: {
3542: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3543: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3544: }
3545: else
3546: {
3547: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3548: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3549: JUMPHERE(jump);
3550: }
3551: return TRUE;
3552: }
3553: if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3554: {
3555: if (readch)
3556: read_char(common);
3557: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3558: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3559: add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3560: return TRUE;
3561: }
3562: return FALSE;
3563:
3564: default:
3565: return FALSE;
3566: }
3567: }
3568:
3569: static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3570: {
3571: int i, bit, length;
3572: const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3573:
3574: bit = ctypes[0] & flag;
3575: ranges[0] = -1;
3576: ranges[1] = bit != 0 ? 1 : 0;
3577: length = 0;
3578:
3579: for (i = 1; i < 256; i++)
3580: if ((ctypes[i] & flag) != bit)
3581: {
3582: if (length >= MAX_RANGE_SIZE)
3583: return;
3584: ranges[2 + length] = i;
3585: length++;
3586: bit ^= flag;
3587: }
3588:
3589: if (bit != 0)
3590: {
3591: if (length >= MAX_RANGE_SIZE)
3592: return;
3593: ranges[2 + length] = 256;
3594: length++;
3595: }
3596: ranges[0] = length;
3597: }
3598:
3599: static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3600: {
3601: int ranges[2 + MAX_RANGE_SIZE];
3602: pcre_uint8 bit, cbit, all;
3603: int i, byte, length = 0;
3604:
3605: bit = bits[0] & 0x1;
3606: ranges[1] = bit;
3607: /* Can be 0 or 255. */
3608: all = -bit;
3609:
3610: for (i = 0; i < 256; )
3611: {
3612: byte = i >> 3;
3613: if ((i & 0x7) == 0 && bits[byte] == all)
3614: i += 8;
3615: else
3616: {
3617: cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3618: if (cbit != bit)
3619: {
3620: if (length >= MAX_RANGE_SIZE)
3621: return FALSE;
3622: ranges[2 + length] = i;
3623: length++;
3624: bit = cbit;
3625: all = -cbit;
3626: }
3627: i++;
3628: }
3629: }
3630:
3631: if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3632: {
3633: if (length >= MAX_RANGE_SIZE)
3634: return FALSE;
3635: ranges[2 + length] = 256;
3636: length++;
3637: }
3638: ranges[0] = length;
3639:
3640: return check_ranges(common, ranges, backtracks, FALSE);
3641: }
3642:
3643: static void check_anynewline(compiler_common *common)
3644: {
3645: /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3646: DEFINE_COMPILER;
3647:
3648: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3649:
3650: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3651: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3652: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3653: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3654: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3655: #ifdef COMPILE_PCRE8
3656: if (common->utf)
3657: {
3658: #endif
3659: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3660: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3661: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3662: #ifdef COMPILE_PCRE8
3663: }
3664: #endif
3665: #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3666: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3667: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3668: }
3669:
3670: static void check_hspace(compiler_common *common)
3671: {
3672: /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3673: DEFINE_COMPILER;
3674:
3675: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3676:
3677: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3678: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3679: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3680: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3681: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3682: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3683: #ifdef COMPILE_PCRE8
3684: if (common->utf)
3685: {
3686: #endif
3687: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3688: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3689: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3690: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3691: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3692: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3693: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3694: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3695: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3696: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3697: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3698: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3699: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3700: #ifdef COMPILE_PCRE8
3701: }
3702: #endif
3703: #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3704: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3705:
3706: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3707: }
3708:
3709: static void check_vspace(compiler_common *common)
3710: {
3711: /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3712: DEFINE_COMPILER;
3713:
3714: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3715:
3716: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3717: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3718: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3719: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3720: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3721: #ifdef COMPILE_PCRE8
3722: if (common->utf)
3723: {
3724: #endif
3725: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3726: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3727: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3728: #ifdef COMPILE_PCRE8
3729: }
3730: #endif
3731: #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3732: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3733:
3734: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3735: }
3736:
3737: #define CHAR1 STR_END
3738: #define CHAR2 STACK_TOP
3739:
3740: static void do_casefulcmp(compiler_common *common)
3741: {
3742: DEFINE_COMPILER;
3743: struct sljit_jump *jump;
3744: struct sljit_label *label;
3745:
3746: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3747: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3748: OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3749: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3750: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3751: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3752:
3753: label = LABEL();
3754: OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3755: OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3756: jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3757: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3758: JUMPTO(SLJIT_C_NOT_ZERO, label);
3759:
3760: JUMPHERE(jump);
3761: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3762: OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3763: OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3764: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3765: }
3766:
3767: #define LCC_TABLE STACK_LIMIT
3768:
3769: static void do_caselesscmp(compiler_common *common)
3770: {
3771: DEFINE_COMPILER;
3772: struct sljit_jump *jump;
3773: struct sljit_label *label;
3774:
3775: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3776: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3777:
3778: OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3779: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3780: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3781: OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3782: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3783: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3784:
3785: label = LABEL();
3786: OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3787: OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3788: #ifndef COMPILE_PCRE8
3789: jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3790: #endif
3791: OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3792: #ifndef COMPILE_PCRE8
3793: JUMPHERE(jump);
3794: jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3795: #endif
3796: OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3797: #ifndef COMPILE_PCRE8
3798: JUMPHERE(jump);
3799: #endif
3800: jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3801: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3802: JUMPTO(SLJIT_C_NOT_ZERO, label);
3803:
3804: JUMPHERE(jump);
3805: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3806: OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3807: OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3808: OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3809: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3810: }
3811:
3812: #undef LCC_TABLE
3813: #undef CHAR1
3814: #undef CHAR2
3815:
3816: #if defined SUPPORT_UTF && defined SUPPORT_UCP
3817:
3818: static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3819: {
3820: /* This function would be ineffective to do in JIT level. */
3821: pcre_uint32 c1, c2;
3822: const pcre_uchar *src2 = args->uchar_ptr;
3823: const pcre_uchar *end2 = args->end;
3824: const ucd_record *ur;
3825: const pcre_uint32 *pp;
3826:
3827: while (src1 < end1)
3828: {
3829: if (src2 >= end2)
3830: return (pcre_uchar*)1;
3831: GETCHARINC(c1, src1);
3832: GETCHARINC(c2, src2);
3833: ur = GET_UCD(c2);
3834: if (c1 != c2 && c1 != c2 + ur->other_case)
3835: {
3836: pp = PRIV(ucd_caseless_sets) + ur->caseset;
3837: for (;;)
3838: {
3839: if (c1 < *pp) return NULL;
3840: if (c1 == *pp++) break;
3841: }
3842: }
3843: }
3844: return src2;
3845: }
3846:
3847: #endif /* SUPPORT_UTF && SUPPORT_UCP */
3848:
3849: static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3850: compare_context* context, jump_list **backtracks)
3851: {
3852: DEFINE_COMPILER;
3853: unsigned int othercasebit = 0;
3854: pcre_uchar *othercasechar = NULL;
3855: #ifdef SUPPORT_UTF
3856: int utflength;
3857: #endif
3858:
3859: if (caseless && char_has_othercase(common, cc))
3860: {
3861: othercasebit = char_get_othercase_bit(common, cc);
3862: SLJIT_ASSERT(othercasebit);
3863: /* Extracting bit difference info. */
3864: #if defined COMPILE_PCRE8
3865: othercasechar = cc + (othercasebit >> 8);
3866: othercasebit &= 0xff;
3867: #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3868: /* Note that this code only handles characters in the BMP. If there
3869: ever are characters outside the BMP whose othercase differs in only one
3870: bit from itself (there currently are none), this code will need to be
3871: revised for COMPILE_PCRE32. */
3872: othercasechar = cc + (othercasebit >> 9);
3873: if ((othercasebit & 0x100) != 0)
3874: othercasebit = (othercasebit & 0xff) << 8;
3875: else
3876: othercasebit &= 0xff;
3877: #endif /* COMPILE_PCRE[8|16|32] */
3878: }
3879:
3880: if (context->sourcereg == -1)
3881: {
3882: #if defined COMPILE_PCRE8
3883: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3884: if (context->length >= 4)
3885: OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3886: else if (context->length >= 2)
3887: OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3888: else
3889: #endif
3890: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3891: #elif defined COMPILE_PCRE16
3892: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3893: if (context->length >= 4)
3894: OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3895: else
3896: #endif
3897: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3898: #elif defined COMPILE_PCRE32
3899: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3900: #endif /* COMPILE_PCRE[8|16|32] */
3901: context->sourcereg = TMP2;
3902: }
3903:
3904: #ifdef SUPPORT_UTF
3905: utflength = 1;
3906: if (common->utf && HAS_EXTRALEN(*cc))
3907: utflength += GET_EXTRALEN(*cc);
3908:
3909: do
3910: {
3911: #endif
3912:
3913: context->length -= IN_UCHARS(1);
3914: #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3915:
3916: /* Unaligned read is supported. */
3917: if (othercasebit != 0 && othercasechar == cc)
3918: {
3919: context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3920: context->oc.asuchars[context->ucharptr] = othercasebit;
3921: }
3922: else
3923: {
3924: context->c.asuchars[context->ucharptr] = *cc;
3925: context->oc.asuchars[context->ucharptr] = 0;
3926: }
3927: context->ucharptr++;
3928:
3929: #if defined COMPILE_PCRE8
3930: if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3931: #else
3932: if (context->ucharptr >= 2 || context->length == 0)
3933: #endif
3934: {
3935: if (context->length >= 4)
3936: OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3937: else if (context->length >= 2)
3938: OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3939: #if defined COMPILE_PCRE8
3940: else if (context->length >= 1)
3941: OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3942: #endif /* COMPILE_PCRE8 */
3943: context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3944:
3945: switch(context->ucharptr)
3946: {
3947: case 4 / sizeof(pcre_uchar):
3948: if (context->oc.asint != 0)
3949: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3950: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3951: break;
3952:
3953: case 2 / sizeof(pcre_uchar):
3954: if (context->oc.asushort != 0)
3955: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3956: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3957: break;
3958:
3959: #ifdef COMPILE_PCRE8
3960: case 1:
3961: if (context->oc.asbyte != 0)
3962: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3963: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3964: break;
3965: #endif
3966:
3967: default:
3968: SLJIT_ASSERT_STOP();
3969: break;
3970: }
3971: context->ucharptr = 0;
3972: }
3973:
3974: #else
3975:
3976: /* Unaligned read is unsupported or in 32 bit mode. */
3977: if (context->length >= 1)
3978: OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3979:
3980: context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3981:
3982: if (othercasebit != 0 && othercasechar == cc)
3983: {
3984: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3985: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3986: }
3987: else
3988: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3989:
3990: #endif
3991:
3992: cc++;
3993: #ifdef SUPPORT_UTF
3994: utflength--;
3995: }
3996: while (utflength > 0);
3997: #endif
3998:
3999: return cc;
4000: }
4001:
4002: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4003:
4004: #define SET_TYPE_OFFSET(value) \
4005: if ((value) != typeoffset) \
4006: { \
4007: if ((value) > typeoffset) \
4008: OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4009: else \
4010: OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4011: } \
4012: typeoffset = (value);
4013:
4014: #define SET_CHAR_OFFSET(value) \
4015: if ((value) != charoffset) \
4016: { \
4017: if ((value) > charoffset) \
4018: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4019: else \
4020: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4021: } \
4022: charoffset = (value);
4023:
4024: static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4025: {
4026: DEFINE_COMPILER;
4027: jump_list *found = NULL;
4028: jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4029: pcre_int32 c, charoffset;
4030: const pcre_uint32 *other_cases;
4031: struct sljit_jump *jump = NULL;
4032: pcre_uchar *ccbegin;
4033: int compares, invertcmp, numberofcmps;
4034: #ifdef SUPPORT_UCP
4035: BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4036: BOOL charsaved = FALSE;
4037: int typereg = TMP1, scriptreg = TMP1;
4038: pcre_int32 typeoffset;
4039: #endif
4040:
4041: /* Although SUPPORT_UTF must be defined, we are
4042: not necessary in utf mode even in 8 bit mode. */
4043: detect_partial_match(common, backtracks);
4044: read_char(common);
4045:
4046: if ((*cc++ & XCL_MAP) != 0)
4047: {
4048: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4049: #ifndef COMPILE_PCRE8
4050: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4051: #elif defined SUPPORT_UTF
4052: if (common->utf)
4053: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4054: #endif
4055:
4056: if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4057: {
4058: OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4059: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4060: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4061: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4062: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4063: add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4064: }
4065:
4066: #ifndef COMPILE_PCRE8
4067: JUMPHERE(jump);
4068: #elif defined SUPPORT_UTF
4069: if (common->utf)
4070: JUMPHERE(jump);
4071: #endif
4072: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4073: #ifdef SUPPORT_UCP
4074: charsaved = TRUE;
4075: #endif
4076: cc += 32 / sizeof(pcre_uchar);
4077: }
4078:
4079: /* Scanning the necessary info. */
4080: ccbegin = cc;
4081: compares = 0;
4082: while (*cc != XCL_END)
4083: {
4084: compares++;
4085: if (*cc == XCL_SINGLE)
4086: {
4087: cc += 2;
4088: #ifdef SUPPORT_UTF
4089: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4090: #endif
4091: #ifdef SUPPORT_UCP
4092: needschar = TRUE;
4093: #endif
4094: }
4095: else if (*cc == XCL_RANGE)
4096: {
4097: cc += 2;
4098: #ifdef SUPPORT_UTF
4099: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4100: #endif
4101: cc++;
4102: #ifdef SUPPORT_UTF
4103: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4104: #endif
4105: #ifdef SUPPORT_UCP
4106: needschar = TRUE;
4107: #endif
4108: }
4109: #ifdef SUPPORT_UCP
4110: else
4111: {
4112: SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4113: cc++;
4114: switch(*cc)
4115: {
4116: case PT_ANY:
4117: break;
4118:
4119: case PT_LAMP:
4120: case PT_GC:
4121: case PT_PC:
4122: case PT_ALNUM:
4123: needstype = TRUE;
4124: break;
4125:
4126: case PT_SC:
4127: needsscript = TRUE;
4128: break;
4129:
4130: case PT_SPACE:
4131: case PT_PXSPACE:
4132: case PT_WORD:
4133: needstype = TRUE;
4134: needschar = TRUE;
4135: break;
4136:
4137: case PT_CLIST:
4138: case PT_UCNC:
4139: needschar = TRUE;
4140: break;
4141:
4142: default:
4143: SLJIT_ASSERT_STOP();
4144: break;
4145: }
4146: cc += 2;
4147: }
4148: #endif
4149: }
4150:
4151: #ifdef SUPPORT_UCP
4152: /* Simple register allocation. TMP1 is preferred if possible. */
4153: if (needstype || needsscript)
4154: {
4155: if (needschar && !charsaved)
4156: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4157: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4158: if (needschar)
4159: {
4160: if (needstype)
4161: {
4162: OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4163: typereg = RETURN_ADDR;
4164: }
4165:
4166: if (needsscript)
4167: scriptreg = TMP3;
4168: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4169: }
4170: else if (needstype && needsscript)
4171: scriptreg = TMP3;
4172: /* In all other cases only one of them was specified, and that can goes to TMP1. */
4173:
4174: if (needsscript)
4175: {
4176: if (scriptreg == TMP1)
4177: {
4178: OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4179: OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4180: }
4181: else
4182: {
4183: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4184: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4185: OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4186: }
4187: }
4188: }
4189: #endif
4190:
4191: /* Generating code. */
4192: cc = ccbegin;
4193: charoffset = 0;
4194: numberofcmps = 0;
4195: #ifdef SUPPORT_UCP
4196: typeoffset = 0;
4197: #endif
4198:
4199: while (*cc != XCL_END)
4200: {
4201: compares--;
4202: invertcmp = (compares == 0 && list != backtracks);
4203: jump = NULL;
4204:
4205: if (*cc == XCL_SINGLE)
4206: {
4207: cc ++;
4208: #ifdef SUPPORT_UTF
4209: if (common->utf)
4210: {
4211: GETCHARINC(c, cc);
4212: }
4213: else
4214: #endif
4215: c = *cc++;
4216:
4217: if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4218: {
4219: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4220: OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4221: numberofcmps++;
4222: }
4223: else if (numberofcmps > 0)
4224: {
4225: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4226: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4227: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4228: numberofcmps = 0;
4229: }
4230: else
4231: {
4232: jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4233: numberofcmps = 0;
4234: }
4235: }
4236: else if (*cc == XCL_RANGE)
4237: {
4238: cc ++;
4239: #ifdef SUPPORT_UTF
4240: if (common->utf)
4241: {
4242: GETCHARINC(c, cc);
4243: }
4244: else
4245: #endif
4246: c = *cc++;
4247: SET_CHAR_OFFSET(c);
4248: #ifdef SUPPORT_UTF
4249: if (common->utf)
4250: {
4251: GETCHARINC(c, cc);
4252: }
4253: else
4254: #endif
4255: c = *cc++;
4256: if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4257: {
4258: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4259: OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4260: numberofcmps++;
4261: }
4262: else if (numberofcmps > 0)
4263: {
4264: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4265: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4266: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4267: numberofcmps = 0;
4268: }
4269: else
4270: {
4271: jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4272: numberofcmps = 0;
4273: }
4274: }
4275: #ifdef SUPPORT_UCP
4276: else
4277: {
4278: if (*cc == XCL_NOTPROP)
4279: invertcmp ^= 0x1;
4280: cc++;
4281: switch(*cc)
4282: {
4283: case PT_ANY:
4284: if (list != backtracks)
4285: {
4286: if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4287: continue;
4288: }
4289: else if (cc[-1] == XCL_NOTPROP)
4290: continue;
4291: jump = JUMP(SLJIT_JUMP);
4292: break;
4293:
4294: case PT_LAMP:
4295: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4296: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4297: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4298: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4299: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4300: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4301: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4302: break;
4303:
4304: case PT_GC:
4305: c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4306: SET_TYPE_OFFSET(c);
4307: jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4308: break;
4309:
4310: case PT_PC:
4311: jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4312: break;
4313:
4314: case PT_SC:
4315: jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4316: break;
4317:
4318: case PT_SPACE:
4319: case PT_PXSPACE:
4320: if (*cc == PT_SPACE)
4321: {
4322: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4323: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4324: }
4325: SET_CHAR_OFFSET(9);
4326: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4327: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4328: if (*cc == PT_SPACE)
4329: JUMPHERE(jump);
4330:
4331: SET_TYPE_OFFSET(ucp_Zl);
4332: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4333: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4334: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4335: break;
4336:
4337: case PT_WORD:
4338: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4339: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4340: /* Fall through. */
4341:
4342: case PT_ALNUM:
4343: SET_TYPE_OFFSET(ucp_Ll);
4344: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4345: OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4346: SET_TYPE_OFFSET(ucp_Nd);
4347: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4348: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4349: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4350: break;
4351:
4352: case PT_CLIST:
4353: other_cases = PRIV(ucd_caseless_sets) + cc[1];
4354:
4355: /* At least three characters are required.
4356: Otherwise this case would be handled by the normal code path. */
4357: SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4358: SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4359:
4360: /* Optimizing character pairs, if their difference is power of 2. */
4361: if (is_powerof2(other_cases[1] ^ other_cases[0]))
4362: {
4363: if (charoffset == 0)
4364: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4365: else
4366: {
4367: OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4368: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4369: }
4370: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4371: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4372: other_cases += 2;
4373: }
4374: else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4375: {
4376: if (charoffset == 0)
4377: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4378: else
4379: {
4380: OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4381: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4382: }
4383: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4384: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4385:
4386: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4387: OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4388:
4389: other_cases += 3;
4390: }
4391: else
4392: {
4393: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4394: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4395: }
4396:
4397: while (*other_cases != NOTACHAR)
4398: {
4399: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4400: OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4401: }
4402: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4403: break;
4404:
4405: case PT_UCNC:
4406: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4407: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4408: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4409: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4410: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4411: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4412:
4413: SET_CHAR_OFFSET(0xa0);
4414: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4415: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4416: SET_CHAR_OFFSET(0);
4417: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4418: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4419: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4420: break;
4421: }
4422: cc += 2;
4423: }
4424: #endif
4425:
4426: if (jump != NULL)
4427: add_jump(compiler, compares > 0 ? list : backtracks, jump);
4428: }
4429:
4430: if (found != NULL)
4431: set_jumps(found, LABEL());
4432: }
4433:
4434: #undef SET_TYPE_OFFSET
4435: #undef SET_CHAR_OFFSET
4436:
4437: #endif
4438:
4439: static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4440: {
4441: DEFINE_COMPILER;
4442: int length;
4443: unsigned int c, oc, bit;
4444: compare_context context;
4445: struct sljit_jump *jump[4];
4446: jump_list *end_list;
4447: #ifdef SUPPORT_UTF
4448: struct sljit_label *label;
4449: #ifdef SUPPORT_UCP
4450: pcre_uchar propdata[5];
4451: #endif
4452: #endif
4453:
4454: switch(type)
4455: {
4456: case OP_SOD:
4457: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4458: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4459: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4460: return cc;
4461:
4462: case OP_SOM:
4463: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4464: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4465: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4466: return cc;
4467:
4468: case OP_NOT_WORD_BOUNDARY:
4469: case OP_WORD_BOUNDARY:
4470: add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4471: add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4472: return cc;
4473:
4474: case OP_NOT_DIGIT:
4475: case OP_DIGIT:
4476: /* Digits are usually 0-9, so it is worth to optimize them. */
4477: if (common->digits[0] == -2)
4478: get_ctype_ranges(common, ctype_digit, common->digits);
4479: detect_partial_match(common, backtracks);
4480: /* Flip the starting bit in the negative case. */
4481: if (type == OP_NOT_DIGIT)
4482: common->digits[1] ^= 1;
4483: if (!check_ranges(common, common->digits, backtracks, TRUE))
4484: {
4485: read_char8_type(common);
4486: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4487: add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4488: }
4489: if (type == OP_NOT_DIGIT)
4490: common->digits[1] ^= 1;
4491: return cc;
4492:
4493: case OP_NOT_WHITESPACE:
4494: case OP_WHITESPACE:
4495: detect_partial_match(common, backtracks);
4496: read_char8_type(common);
4497: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4498: add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4499: return cc;
4500:
4501: case OP_NOT_WORDCHAR:
4502: case OP_WORDCHAR:
4503: detect_partial_match(common, backtracks);
4504: read_char8_type(common);
4505: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4506: add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4507: return cc;
4508:
4509: case OP_ANY:
4510: detect_partial_match(common, backtracks);
4511: read_char(common);
4512: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4513: {
4514: jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4515: end_list = NULL;
4516: if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4517: add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4518: else
4519: check_str_end(common, &end_list);
4520:
4521: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4522: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4523: set_jumps(end_list, LABEL());
4524: JUMPHERE(jump[0]);
4525: }
4526: else
4527: check_newlinechar(common, common->nltype, backtracks, TRUE);
4528: return cc;
4529:
4530: case OP_ALLANY:
4531: detect_partial_match(common, backtracks);
4532: #ifdef SUPPORT_UTF
4533: if (common->utf)
4534: {
4535: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4536: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4537: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4538: #if defined COMPILE_PCRE8
4539: jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4540: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4541: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4542: #elif defined COMPILE_PCRE16
4543: jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4544: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4545: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4547: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4548: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4549: #endif
4550: JUMPHERE(jump[0]);
4551: #endif /* COMPILE_PCRE[8|16] */
4552: return cc;
4553: }
4554: #endif
4555: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4556: return cc;
4557:
4558: case OP_ANYBYTE:
4559: detect_partial_match(common, backtracks);
4560: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4561: return cc;
4562:
4563: #ifdef SUPPORT_UTF
4564: #ifdef SUPPORT_UCP
4565: case OP_NOTPROP:
4566: case OP_PROP:
4567: propdata[0] = 0;
4568: propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4569: propdata[2] = cc[0];
4570: propdata[3] = cc[1];
4571: propdata[4] = XCL_END;
4572: compile_xclass_matchingpath(common, propdata, backtracks);
4573: return cc + 2;
4574: #endif
4575: #endif
4576:
4577: case OP_ANYNL:
4578: detect_partial_match(common, backtracks);
4579: read_char(common);
4580: jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4581: /* We don't need to handle soft partial matching case. */
4582: end_list = NULL;
4583: if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4584: add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4585: else
4586: check_str_end(common, &end_list);
4587: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4588: jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4589: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4590: jump[2] = JUMP(SLJIT_JUMP);
4591: JUMPHERE(jump[0]);
4592: check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4593: set_jumps(end_list, LABEL());
4594: JUMPHERE(jump[1]);
4595: JUMPHERE(jump[2]);
4596: return cc;
4597:
4598: case OP_NOT_HSPACE:
4599: case OP_HSPACE:
4600: detect_partial_match(common, backtracks);
4601: read_char(common);
4602: add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4603: add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4604: return cc;
4605:
4606: case OP_NOT_VSPACE:
4607: case OP_VSPACE:
4608: detect_partial_match(common, backtracks);
4609: read_char(common);
4610: add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4611: add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4612: return cc;
4613:
4614: #ifdef SUPPORT_UCP
4615: case OP_EXTUNI:
4616: detect_partial_match(common, backtracks);
4617: read_char(common);
4618: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4619: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4620: /* Optimize register allocation: use a real register. */
4621: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4622: OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4623:
4624: label = LABEL();
4625: jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4626: OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4627: read_char(common);
4628: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4629: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4630: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4631:
4632: OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4633: OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4634: OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4635: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4636: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4637: JUMPTO(SLJIT_C_NOT_ZERO, label);
4638:
4639: OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4640: JUMPHERE(jump[0]);
4641: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4642:
4643: if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4644: {
4645: jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4646: /* Since we successfully read a char above, partial matching must occure. */
4647: check_partial(common, TRUE);
4648: JUMPHERE(jump[0]);
4649: }
4650: return cc;
4651: #endif
4652:
4653: case OP_EODN:
4654: /* Requires rather complex checks. */
4655: jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4656: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4657: {
4658: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4659: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660: if (common->mode == JIT_COMPILE)
4661: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4662: else
4663: {
4664: jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4665: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4666: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4667: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4668: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4669: add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4670: check_partial(common, TRUE);
4671: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4672: JUMPHERE(jump[1]);
4673: }
4674: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4675: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4676: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4677: }
4678: else if (common->nltype == NLTYPE_FIXED)
4679: {
4680: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4681: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4682: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4683: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4684: }
4685: else
4686: {
4687: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4688: jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4689: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4690: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4691: jump[2] = JUMP(SLJIT_C_GREATER);
4692: add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4693: /* Equal. */
4694: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4695: jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4696: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4697:
4698: JUMPHERE(jump[1]);
4699: if (common->nltype == NLTYPE_ANYCRLF)
4700: {
4701: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4702: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4703: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4704: }
4705: else
4706: {
4707: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4708: read_char(common);
4709: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4710: add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4711: add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4712: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4713: }
4714: JUMPHERE(jump[2]);
4715: JUMPHERE(jump[3]);
4716: }
4717: JUMPHERE(jump[0]);
4718: check_partial(common, FALSE);
4719: return cc;
4720:
4721: case OP_EOD:
4722: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4723: check_partial(common, FALSE);
4724: return cc;
4725:
4726: case OP_CIRC:
4727: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4728: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4729: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4730: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4731: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4732: return cc;
4733:
4734: case OP_CIRCM:
4735: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4736: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4737: jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4738: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4739: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4740: jump[0] = JUMP(SLJIT_JUMP);
4741: JUMPHERE(jump[1]);
4742:
4743: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4744: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4745: {
4746: OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4747: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4748: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4749: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4750: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4751: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4752: }
4753: else
4754: {
4755: skip_char_back(common);
4756: read_char(common);
4757: check_newlinechar(common, common->nltype, backtracks, FALSE);
4758: }
4759: JUMPHERE(jump[0]);
4760: return cc;
4761:
4762: case OP_DOLL:
4763: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4764: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4765: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4766:
4767: if (!common->endonly)
4768: compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4769: else
4770: {
4771: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4772: check_partial(common, FALSE);
4773: }
4774: return cc;
4775:
4776: case OP_DOLLM:
4777: jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4778: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4779: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4780: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4781: check_partial(common, FALSE);
4782: jump[0] = JUMP(SLJIT_JUMP);
4783: JUMPHERE(jump[1]);
4784:
4785: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4786: {
4787: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4788: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4789: if (common->mode == JIT_COMPILE)
4790: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4791: else
4792: {
4793: jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4794: /* STR_PTR = STR_END - IN_UCHARS(1) */
4795: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4796: check_partial(common, TRUE);
4797: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4798: JUMPHERE(jump[1]);
4799: }
4800:
4801: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4802: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4803: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4804: }
4805: else
4806: {
4807: peek_char(common);
4808: check_newlinechar(common, common->nltype, backtracks, FALSE);
4809: }
4810: JUMPHERE(jump[0]);
4811: return cc;
4812:
4813: case OP_CHAR:
4814: case OP_CHARI:
4815: length = 1;
4816: #ifdef SUPPORT_UTF
4817: if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4818: #endif
4819: if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4820: {
4821: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4822: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4823:
4824: context.length = IN_UCHARS(length);
4825: context.sourcereg = -1;
4826: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4827: context.ucharptr = 0;
4828: #endif
4829: return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4830: }
4831: detect_partial_match(common, backtracks);
4832: read_char(common);
4833: #ifdef SUPPORT_UTF
4834: if (common->utf)
4835: {
4836: GETCHAR(c, cc);
4837: }
4838: else
4839: #endif
4840: c = *cc;
4841: if (type == OP_CHAR || !char_has_othercase(common, cc))
4842: {
4843: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4844: return cc + length;
4845: }
4846: oc = char_othercase(common, c);
4847: bit = c ^ oc;
4848: if (is_powerof2(bit))
4849: {
4850: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4851: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4852: return cc + length;
4853: }
4854: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4855: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4856: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4857: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4858: add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4859: return cc + length;
4860:
4861: case OP_NOT:
4862: case OP_NOTI:
4863: detect_partial_match(common, backtracks);
4864: length = 1;
4865: #ifdef SUPPORT_UTF
4866: if (common->utf)
4867: {
4868: #ifdef COMPILE_PCRE8
4869: c = *cc;
4870: if (c < 128)
4871: {
4872: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4873: if (type == OP_NOT || !char_has_othercase(common, cc))
4874: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4875: else
4876: {
4877: /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4878: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4879: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4880: }
4881: /* Skip the variable-length character. */
4882: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4883: jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4884: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4885: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4886: JUMPHERE(jump[0]);
4887: return cc + 1;
4888: }
4889: else
4890: #endif /* COMPILE_PCRE8 */
4891: {
4892: GETCHARLEN(c, cc, length);
4893: read_char(common);
4894: }
4895: }
4896: else
4897: #endif /* SUPPORT_UTF */
4898: {
4899: read_char(common);
4900: c = *cc;
4901: }
4902:
4903: if (type == OP_NOT || !char_has_othercase(common, cc))
4904: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4905: else
4906: {
4907: oc = char_othercase(common, c);
4908: bit = c ^ oc;
4909: if (is_powerof2(bit))
4910: {
4911: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4912: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4913: }
4914: else
4915: {
4916: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4917: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4918: }
4919: }
4920: return cc + length;
4921:
4922: case OP_CLASS:
4923: case OP_NCLASS:
4924: detect_partial_match(common, backtracks);
4925: read_char(common);
4926: if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4927: return cc + 32 / sizeof(pcre_uchar);
4928:
4929: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4930: jump[0] = NULL;
4931: #ifdef COMPILE_PCRE8
4932: /* This check only affects 8 bit mode. In other modes, we
4933: always need to compare the value with 255. */
4934: if (common->utf)
4935: #endif /* COMPILE_PCRE8 */
4936: {
4937: jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4938: if (type == OP_CLASS)
4939: {
4940: add_jump(compiler, backtracks, jump[0]);
4941: jump[0] = NULL;
4942: }
4943: }
4944: #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4945: OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4946: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4947: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4948: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4949: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4950: add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4951: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4952: if (jump[0] != NULL)
4953: JUMPHERE(jump[0]);
4954: #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4955: return cc + 32 / sizeof(pcre_uchar);
4956:
4957: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4958: case OP_XCLASS:
4959: compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4960: return cc + GET(cc, 0) - 1;
4961: #endif
4962:
4963: case OP_REVERSE:
4964: length = GET(cc, 0);
4965: if (length == 0)
4966: return cc + LINK_SIZE;
4967: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4968: #ifdef SUPPORT_UTF
4969: if (common->utf)
4970: {
4971: OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4972: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4973: label = LABEL();
4974: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4975: skip_char_back(common);
4976: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4977: JUMPTO(SLJIT_C_NOT_ZERO, label);
4978: }
4979: else
4980: #endif
4981: {
4982: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4983: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4984: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4985: }
4986: check_start_used_ptr(common);
4987: return cc + LINK_SIZE;
4988: }
4989: SLJIT_ASSERT_STOP();
4990: return cc;
4991: }
4992:
4993: static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4994: {
4995: /* This function consumes at least one input character. */
4996: /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4997: DEFINE_COMPILER;
4998: pcre_uchar *ccbegin = cc;
4999: compare_context context;
5000: int size;
5001:
5002: context.length = 0;
5003: do
5004: {
5005: if (cc >= ccend)
5006: break;
5007:
5008: if (*cc == OP_CHAR)
5009: {
5010: size = 1;
5011: #ifdef SUPPORT_UTF
5012: if (common->utf && HAS_EXTRALEN(cc[1]))
5013: size += GET_EXTRALEN(cc[1]);
5014: #endif
5015: }
5016: else if (*cc == OP_CHARI)
5017: {
5018: size = 1;
5019: #ifdef SUPPORT_UTF
5020: if (common->utf)
5021: {
5022: if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5023: size = 0;
5024: else if (HAS_EXTRALEN(cc[1]))
5025: size += GET_EXTRALEN(cc[1]);
5026: }
5027: else
5028: #endif
5029: if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5030: size = 0;
5031: }
5032: else
5033: size = 0;
5034:
5035: cc += 1 + size;
5036: context.length += IN_UCHARS(size);
5037: }
5038: while (size > 0 && context.length <= 128);
5039:
5040: cc = ccbegin;
5041: if (context.length > 0)
5042: {
5043: /* We have a fixed-length byte sequence. */
5044: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5045: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5046:
5047: context.sourcereg = -1;
5048: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5049: context.ucharptr = 0;
5050: #endif
5051: do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5052: return cc;
5053: }
5054:
5055: /* A non-fixed length character will be checked if length == 0. */
5056: return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5057: }
5058:
5059: static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5060: {
5061: DEFINE_COMPILER;
5062: int offset = GET2(cc, 1) << 1;
5063:
5064: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5065: if (!common->jscript_compat)
5066: {
5067: if (backtracks == NULL)
5068: {
5069: /* OVECTOR(1) contains the "string begin - 1" constant. */
5070: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5071: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5072: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5073: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5074: return JUMP(SLJIT_C_NOT_ZERO);
5075: }
5076: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5077: }
5078: return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5079: }
5080:
5081: /* Forward definitions. */
5082: static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5083: static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5084:
5085: #define PUSH_BACKTRACK(size, ccstart, error) \
5086: do \
5087: { \
5088: backtrack = sljit_alloc_memory(compiler, (size)); \
5089: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5090: return error; \
5091: memset(backtrack, 0, size); \
5092: backtrack->prev = parent->top; \
5093: backtrack->cc = (ccstart); \
5094: parent->top = backtrack; \
5095: } \
5096: while (0)
5097:
5098: #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5099: do \
5100: { \
5101: backtrack = sljit_alloc_memory(compiler, (size)); \
5102: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5103: return; \
5104: memset(backtrack, 0, size); \
5105: backtrack->prev = parent->top; \
5106: backtrack->cc = (ccstart); \
5107: parent->top = backtrack; \
5108: } \
5109: while (0)
5110:
5111: #define BACKTRACK_AS(type) ((type *)backtrack)
5112:
5113: static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5114: {
5115: DEFINE_COMPILER;
5116: int offset = GET2(cc, 1) << 1;
5117: struct sljit_jump *jump = NULL;
5118: struct sljit_jump *partial;
5119: struct sljit_jump *nopartial;
5120:
5121: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5122: /* OVECTOR(1) contains the "string begin - 1" constant. */
5123: if (withchecks && !common->jscript_compat)
5124: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5125:
5126: #if defined SUPPORT_UTF && defined SUPPORT_UCP
5127: if (common->utf && *cc == OP_REFI)
5128: {
5129: SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5130: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5131: if (withchecks)
5132: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5133:
5134: /* Needed to save important temporary registers. */
5135: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5136: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5137: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5138: sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5139: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5140: if (common->mode == JIT_COMPILE)
5141: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5142: else
5143: {
5144: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5145: nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5146: check_partial(common, FALSE);
5147: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5148: JUMPHERE(nopartial);
5149: }
5150: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5151: }
5152: else
5153: #endif /* SUPPORT_UTF && SUPPORT_UCP */
5154: {
5155: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5156: if (withchecks)
5157: jump = JUMP(SLJIT_C_ZERO);
5158:
5159: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5160: partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5161: if (common->mode == JIT_COMPILE)
5162: add_jump(compiler, backtracks, partial);
5163:
5164: add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5165: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5166:
5167: if (common->mode != JIT_COMPILE)
5168: {
5169: nopartial = JUMP(SLJIT_JUMP);
5170: JUMPHERE(partial);
5171: /* TMP2 -= STR_END - STR_PTR */
5172: OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5173: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5174: partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5175: OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5176: add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5177: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5178: JUMPHERE(partial);
5179: check_partial(common, FALSE);
5180: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5181: JUMPHERE(nopartial);
5182: }
5183: }
5184:
5185: if (jump != NULL)
5186: {
5187: if (emptyfail)
5188: add_jump(compiler, backtracks, jump);
5189: else
5190: JUMPHERE(jump);
5191: }
5192: return cc + 1 + IMM2_SIZE;
5193: }
5194:
5195: static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5196: {
5197: DEFINE_COMPILER;
5198: backtrack_common *backtrack;
5199: pcre_uchar type;
5200: struct sljit_label *label;
5201: struct sljit_jump *zerolength;
5202: struct sljit_jump *jump = NULL;
5203: pcre_uchar *ccbegin = cc;
5204: int min = 0, max = 0;
5205: BOOL minimize;
5206:
5207: PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5208:
5209: type = cc[1 + IMM2_SIZE];
5210: minimize = (type & 0x1) != 0;
5211: switch(type)
5212: {
5213: case OP_CRSTAR:
5214: case OP_CRMINSTAR:
5215: min = 0;
5216: max = 0;
5217: cc += 1 + IMM2_SIZE + 1;
5218: break;
5219: case OP_CRPLUS:
5220: case OP_CRMINPLUS:
5221: min = 1;
5222: max = 0;
5223: cc += 1 + IMM2_SIZE + 1;
5224: break;
5225: case OP_CRQUERY:
5226: case OP_CRMINQUERY:
5227: min = 0;
5228: max = 1;
5229: cc += 1 + IMM2_SIZE + 1;
5230: break;
5231: case OP_CRRANGE:
5232: case OP_CRMINRANGE:
5233: min = GET2(cc, 1 + IMM2_SIZE + 1);
5234: max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5235: cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5236: break;
5237: default:
5238: SLJIT_ASSERT_STOP();
5239: break;
5240: }
5241:
5242: if (!minimize)
5243: {
5244: if (min == 0)
5245: {
5246: allocate_stack(common, 2);
5247: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5248: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5249: /* Temporary release of STR_PTR. */
5250: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5251: zerolength = compile_ref_checks(common, ccbegin, NULL);
5252: /* Restore if not zero length. */
5253: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5254: }
5255: else
5256: {
5257: allocate_stack(common, 1);
5258: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5259: zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5260: }
5261:
5262: if (min > 1 || max > 1)
5263: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5264:
5265: label = LABEL();
5266: compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5267:
5268: if (min > 1 || max > 1)
5269: {
5270: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5271: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5272: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5273: if (min > 1)
5274: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5275: if (max > 1)
5276: {
5277: jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5278: allocate_stack(common, 1);
5279: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5280: JUMPTO(SLJIT_JUMP, label);
5281: JUMPHERE(jump);
5282: }
5283: }
5284:
5285: if (max == 0)
5286: {
5287: /* Includes min > 1 case as well. */
5288: allocate_stack(common, 1);
5289: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5290: JUMPTO(SLJIT_JUMP, label);
5291: }
5292:
5293: JUMPHERE(zerolength);
5294: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5295:
5296: count_match(common);
5297: return cc;
5298: }
5299:
5300: allocate_stack(common, 2);
5301: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5302: if (type != OP_CRMINSTAR)
5303: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5304:
5305: if (min == 0)
5306: {
5307: zerolength = compile_ref_checks(common, ccbegin, NULL);
5308: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5309: jump = JUMP(SLJIT_JUMP);
5310: }
5311: else
5312: zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5313:
5314: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5315: if (max > 0)
5316: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5317:
5318: compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5319: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5320:
5321: if (min > 1)
5322: {
5323: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5324: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5325: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5326: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5327: }
5328: else if (max > 0)
5329: OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5330:
5331: if (jump != NULL)
5332: JUMPHERE(jump);
5333: JUMPHERE(zerolength);
5334:
5335: count_match(common);
5336: return cc;
5337: }
5338:
5339: static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5340: {
5341: DEFINE_COMPILER;
5342: backtrack_common *backtrack;
5343: recurse_entry *entry = common->entries;
5344: recurse_entry *prev = NULL;
5345: sljit_sw start = GET(cc, 1);
5346: pcre_uchar *start_cc;
5347: BOOL needs_control_head;
5348:
5349: PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5350:
5351: /* Inlining simple patterns. */
5352: if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5353: {
5354: start_cc = common->start + start;
5355: compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5356: BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5357: return cc + 1 + LINK_SIZE;
5358: }
5359:
5360: while (entry != NULL)
5361: {
5362: if (entry->start == start)
5363: break;
5364: prev = entry;
5365: entry = entry->next;
5366: }
5367:
5368: if (entry == NULL)
5369: {
5370: entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5371: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5372: return NULL;
5373: entry->next = NULL;
5374: entry->entry = NULL;
5375: entry->calls = NULL;
5376: entry->start = start;
5377:
5378: if (prev != NULL)
5379: prev->next = entry;
5380: else
5381: common->entries = entry;
5382: }
5383:
5384: if (common->has_set_som && common->mark_ptr != 0)
5385: {
5386: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5387: allocate_stack(common, 2);
5388: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5389: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5390: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5391: }
5392: else if (common->has_set_som || common->mark_ptr != 0)
5393: {
5394: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5395: allocate_stack(common, 1);
5396: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5397: }
5398:
5399: if (entry->entry == NULL)
5400: add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5401: else
5402: JUMPTO(SLJIT_FAST_CALL, entry->entry);
5403: /* Leave if the match is failed. */
5404: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5405: return cc + 1 + LINK_SIZE;
5406: }
5407:
5408: static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5409: {
5410: const pcre_uchar *begin = arguments->begin;
5411: int *offset_vector = arguments->offsets;
5412: int offset_count = arguments->offset_count;
5413: int i;
5414:
5415: if (PUBL(callout) == NULL)
5416: return 0;
5417:
5418: callout_block->version = 2;
5419: callout_block->callout_data = arguments->callout_data;
5420:
5421: /* Offsets in subject. */
5422: callout_block->subject_length = arguments->end - arguments->begin;
5423: callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5424: callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5425: #if defined COMPILE_PCRE8
5426: callout_block->subject = (PCRE_SPTR)begin;
5427: #elif defined COMPILE_PCRE16
5428: callout_block->subject = (PCRE_SPTR16)begin;
5429: #elif defined COMPILE_PCRE32
5430: callout_block->subject = (PCRE_SPTR32)begin;
5431: #endif
5432:
5433: /* Convert and copy the JIT offset vector to the offset_vector array. */
5434: callout_block->capture_top = 0;
5435: callout_block->offset_vector = offset_vector;
5436: for (i = 2; i < offset_count; i += 2)
5437: {
5438: offset_vector[i] = jit_ovector[i] - begin;
5439: offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5440: if (jit_ovector[i] >= begin)
5441: callout_block->capture_top = i;
5442: }
5443:
5444: callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5445: if (offset_count > 0)
5446: offset_vector[0] = -1;
5447: if (offset_count > 1)
5448: offset_vector[1] = -1;
5449: return (*PUBL(callout))(callout_block);
5450: }
5451:
5452: /* Aligning to 8 byte. */
5453: #define CALLOUT_ARG_SIZE \
5454: (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5455:
5456: #define CALLOUT_ARG_OFFSET(arg) \
5457: (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5458:
5459: static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5460: {
5461: DEFINE_COMPILER;
5462: backtrack_common *backtrack;
5463:
5464: PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5465:
5466: allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5467:
5468: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5469: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5470: SLJIT_ASSERT(common->capture_last_ptr != 0);
5471: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5472: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5473:
5474: /* These pointer sized fields temporarly stores internal variables. */
5475: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5476: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5477: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5478:
5479: if (common->mark_ptr != 0)
5480: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5481: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5482: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5483: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5484:
5485: /* Needed to save important temporary registers. */
5486: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5487: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5488: GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5489: sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5490: OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5491: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5492: free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5493:
5494: /* Check return value. */
5495: OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5496: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5497: if (common->forced_quit_label == NULL)
5498: add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5499: else
5500: JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5501: return cc + 2 + 2 * LINK_SIZE;
5502: }
5503:
5504: #undef CALLOUT_ARG_SIZE
5505: #undef CALLOUT_ARG_OFFSET
5506:
5507: static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5508: {
5509: DEFINE_COMPILER;
5510: int framesize;
5511: int extrasize;
5512: BOOL needs_control_head;
5513: int private_data_ptr;
5514: backtrack_common altbacktrack;
5515: pcre_uchar *ccbegin;
5516: pcre_uchar opcode;
5517: pcre_uchar bra = OP_BRA;
5518: jump_list *tmp = NULL;
5519: jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5520: jump_list **found;
5521: /* Saving previous accept variables. */
5522: BOOL save_local_exit = common->local_exit;
5523: BOOL save_positive_assert = common->positive_assert;
5524: then_trap_backtrack *save_then_trap = common->then_trap;
5525: struct sljit_label *save_quit_label = common->quit_label;
5526: struct sljit_label *save_accept_label = common->accept_label;
5527: jump_list *save_quit = common->quit;
5528: jump_list *save_positive_assert_quit = common->positive_assert_quit;
5529: jump_list *save_accept = common->accept;
5530: struct sljit_jump *jump;
5531: struct sljit_jump *brajump = NULL;
5532:
5533: /* Assert captures then. */
5534: common->then_trap = NULL;
5535:
5536: if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5537: {
5538: SLJIT_ASSERT(!conditional);
5539: bra = *cc;
5540: cc++;
5541: }
5542: private_data_ptr = PRIVATE_DATA(cc);
5543: SLJIT_ASSERT(private_data_ptr != 0);
5544: framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5545: backtrack->framesize = framesize;
5546: backtrack->private_data_ptr = private_data_ptr;
5547: opcode = *cc;
5548: SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5549: found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5550: ccbegin = cc;
5551: cc += GET(cc, 1);
5552:
5553: if (bra == OP_BRAMINZERO)
5554: {
5555: /* This is a braminzero backtrack path. */
5556: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5557: free_stack(common, 1);
5558: brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5559: }
5560:
5561: if (framesize < 0)
5562: {
5563: extrasize = needs_control_head ? 2 : 1;
5564: if (framesize == no_frame)
5565: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5566: allocate_stack(common, extrasize);
5567: if (needs_control_head)
5568: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5569: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5570: if (needs_control_head)
5571: {
5572: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5573: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5574: }
5575: }
5576: else
5577: {
5578: extrasize = needs_control_head ? 3 : 2;
5579: allocate_stack(common, framesize + extrasize);
5580: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5581: OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5582: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5583: if (needs_control_head)
5584: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5585: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5586: if (needs_control_head)
5587: {
5588: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5589: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5590: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5591: }
5592: else
5593: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5594: init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5595: }
5596:
5597: memset(&altbacktrack, 0, sizeof(backtrack_common));
5598: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5599: {
5600: /* Negative assert is stronger than positive assert. */
5601: common->local_exit = TRUE;
5602: common->quit_label = NULL;
5603: common->quit = NULL;
5604: common->positive_assert = FALSE;
5605: }
5606: else
5607: common->positive_assert = TRUE;
5608: common->positive_assert_quit = NULL;
5609:
5610: while (1)
5611: {
5612: common->accept_label = NULL;
5613: common->accept = NULL;
5614: altbacktrack.top = NULL;
5615: altbacktrack.topbacktracks = NULL;
5616:
5617: if (*ccbegin == OP_ALT)
5618: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5619:
5620: altbacktrack.cc = ccbegin;
5621: compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5622: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5623: {
5624: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5625: {
5626: common->local_exit = save_local_exit;
5627: common->quit_label = save_quit_label;
5628: common->quit = save_quit;
5629: }
5630: common->positive_assert = save_positive_assert;
5631: common->then_trap = save_then_trap;
5632: common->accept_label = save_accept_label;
5633: common->positive_assert_quit = save_positive_assert_quit;
5634: common->accept = save_accept;
5635: return NULL;
5636: }
5637: common->accept_label = LABEL();
5638: if (common->accept != NULL)
5639: set_jumps(common->accept, common->accept_label);
5640:
5641: /* Reset stack. */
5642: if (framesize < 0)
5643: {
5644: if (framesize == no_frame)
5645: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5646: else
5647: free_stack(common, extrasize);
5648: if (needs_control_head)
5649: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5650: }
5651: else
5652: {
5653: if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5654: {
5655: /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5656: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5657: if (needs_control_head)
5658: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5659: }
5660: else
5661: {
5662: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5663: if (needs_control_head)
5664: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5665: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5666: }
5667: }
5668:
5669: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5670: {
5671: /* We know that STR_PTR was stored on the top of the stack. */
5672: if (conditional)
5673: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5674: else if (bra == OP_BRAZERO)
5675: {
5676: if (framesize < 0)
5677: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5678: else
5679: {
5680: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5681: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5682: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5683: }
5684: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5685: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5686: }
5687: else if (framesize >= 0)
5688: {
5689: /* For OP_BRA and OP_BRAMINZERO. */
5690: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5691: }
5692: }
5693: add_jump(compiler, found, JUMP(SLJIT_JUMP));
5694:
5695: compile_backtrackingpath(common, altbacktrack.top);
5696: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5697: {
5698: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5699: {
5700: common->local_exit = save_local_exit;
5701: common->quit_label = save_quit_label;
5702: common->quit = save_quit;
5703: }
5704: common->positive_assert = save_positive_assert;
5705: common->then_trap = save_then_trap;
5706: common->accept_label = save_accept_label;
5707: common->positive_assert_quit = save_positive_assert_quit;
5708: common->accept = save_accept;
5709: return NULL;
5710: }
5711: set_jumps(altbacktrack.topbacktracks, LABEL());
5712:
5713: if (*cc != OP_ALT)
5714: break;
5715:
5716: ccbegin = cc;
5717: cc += GET(cc, 1);
5718: }
5719:
5720: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5721: {
5722: SLJIT_ASSERT(common->positive_assert_quit == NULL);
5723: /* Makes the check less complicated below. */
5724: common->positive_assert_quit = common->quit;
5725: }
5726:
5727: /* None of them matched. */
5728: if (common->positive_assert_quit != NULL)
5729: {
5730: jump = JUMP(SLJIT_JUMP);
5731: set_jumps(common->positive_assert_quit, LABEL());
5732: SLJIT_ASSERT(framesize != no_stack);
5733: if (framesize < 0)
5734: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5735: else
5736: {
5737: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5738: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5739: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5740: }
5741: JUMPHERE(jump);
5742: }
5743:
5744: if (needs_control_head)
5745: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5746:
5747: if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5748: {
5749: /* Assert is failed. */
5750: if (conditional || bra == OP_BRAZERO)
5751: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5752:
5753: if (framesize < 0)
5754: {
5755: /* The topmost item should be 0. */
5756: if (bra == OP_BRAZERO)
5757: {
5758: if (extrasize == 2)
5759: free_stack(common, 1);
5760: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5761: }
5762: else
5763: free_stack(common, extrasize);
5764: }
5765: else
5766: {
5767: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5768: /* The topmost item should be 0. */
5769: if (bra == OP_BRAZERO)
5770: {
5771: free_stack(common, framesize + extrasize - 1);
5772: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5773: }
5774: else
5775: free_stack(common, framesize + extrasize);
5776: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5777: }
5778: jump = JUMP(SLJIT_JUMP);
5779: if (bra != OP_BRAZERO)
5780: add_jump(compiler, target, jump);
5781:
5782: /* Assert is successful. */
5783: set_jumps(tmp, LABEL());
5784: if (framesize < 0)
5785: {
5786: /* We know that STR_PTR was stored on the top of the stack. */
5787: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5788: /* Keep the STR_PTR on the top of the stack. */
5789: if (bra == OP_BRAZERO)
5790: {
5791: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5792: if (extrasize == 2)
5793: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5794: }
5795: else if (bra == OP_BRAMINZERO)
5796: {
5797: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5798: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5799: }
5800: }
5801: else
5802: {
5803: if (bra == OP_BRA)
5804: {
5805: /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5806: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5807: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5808: }
5809: else
5810: {
5811: /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5812: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5813: if (extrasize == 2)
5814: {
5815: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5816: if (bra == OP_BRAMINZERO)
5817: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5818: }
5819: else
5820: {
5821: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5822: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5823: }
5824: }
5825: }
5826:
5827: if (bra == OP_BRAZERO)
5828: {
5829: backtrack->matchingpath = LABEL();
5830: SET_LABEL(jump, backtrack->matchingpath);
5831: }
5832: else if (bra == OP_BRAMINZERO)
5833: {
5834: JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5835: JUMPHERE(brajump);
5836: if (framesize >= 0)
5837: {
5838: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5839: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5840: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5841: }
5842: set_jumps(backtrack->common.topbacktracks, LABEL());
5843: }
5844: }
5845: else
5846: {
5847: /* AssertNot is successful. */
5848: if (framesize < 0)
5849: {
5850: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5851: if (bra != OP_BRA)
5852: {
5853: if (extrasize == 2)
5854: free_stack(common, 1);
5855: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5856: }
5857: else
5858: free_stack(common, extrasize);
5859: }
5860: else
5861: {
5862: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5863: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5864: /* The topmost item should be 0. */
5865: if (bra != OP_BRA)
5866: {
5867: free_stack(common, framesize + extrasize - 1);
5868: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5869: }
5870: else
5871: free_stack(common, framesize + extrasize);
5872: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5873: }
5874:
5875: if (bra == OP_BRAZERO)
5876: backtrack->matchingpath = LABEL();
5877: else if (bra == OP_BRAMINZERO)
5878: {
5879: JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5880: JUMPHERE(brajump);
5881: }
5882:
5883: if (bra != OP_BRA)
5884: {
5885: SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5886: set_jumps(backtrack->common.topbacktracks, LABEL());
5887: backtrack->common.topbacktracks = NULL;
5888: }
5889: }
5890:
5891: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5892: {
5893: common->local_exit = save_local_exit;
5894: common->quit_label = save_quit_label;
5895: common->quit = save_quit;
5896: }
5897: common->positive_assert = save_positive_assert;
5898: common->then_trap = save_then_trap;
5899: common->accept_label = save_accept_label;
5900: common->positive_assert_quit = save_positive_assert_quit;
5901: common->accept = save_accept;
5902: return cc + 1 + LINK_SIZE;
5903: }
5904:
5905: static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5906: {
5907: int condition = FALSE;
5908: pcre_uchar *slotA = name_table;
5909: pcre_uchar *slotB;
5910: sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5911: sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5912: sljit_sw no_capture;
5913: int i;
5914:
5915: locals += refno & 0xff;
5916: refno >>= 8;
5917: no_capture = locals[1];
5918:
5919: for (i = 0; i < name_count; i++)
5920: {
5921: if (GET2(slotA, 0) == refno) break;
5922: slotA += name_entry_size;
5923: }
5924:
5925: if (i < name_count)
5926: {
5927: /* Found a name for the number - there can be only one; duplicate names
5928: for different numbers are allowed, but not vice versa. First scan down
5929: for duplicates. */
5930:
5931: slotB = slotA;
5932: while (slotB > name_table)
5933: {
5934: slotB -= name_entry_size;
5935: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5936: {
5937: condition = locals[GET2(slotB, 0) << 1] != no_capture;
5938: if (condition) break;
5939: }
5940: else break;
5941: }
5942:
5943: /* Scan up for duplicates */
5944: if (!condition)
5945: {
5946: slotB = slotA;
5947: for (i++; i < name_count; i++)
5948: {
5949: slotB += name_entry_size;
5950: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5951: {
5952: condition = locals[GET2(slotB, 0) << 1] != no_capture;
5953: if (condition) break;
5954: }
5955: else break;
5956: }
5957: }
5958: }
5959: return condition;
5960: }
5961:
5962: static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5963: {
5964: int condition = FALSE;
5965: pcre_uchar *slotA = name_table;
5966: pcre_uchar *slotB;
5967: sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5968: sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5969: sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5970: sljit_uw i;
5971:
5972: for (i = 0; i < name_count; i++)
5973: {
5974: if (GET2(slotA, 0) == recno) break;
5975: slotA += name_entry_size;
5976: }
5977:
5978: if (i < name_count)
5979: {
5980: /* Found a name for the number - there can be only one; duplicate
5981: names for different numbers are allowed, but not vice versa. First
5982: scan down for duplicates. */
5983:
5984: slotB = slotA;
5985: while (slotB > name_table)
5986: {
5987: slotB -= name_entry_size;
5988: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5989: {
5990: condition = GET2(slotB, 0) == group_num;
5991: if (condition) break;
5992: }
5993: else break;
5994: }
5995:
5996: /* Scan up for duplicates */
5997: if (!condition)
5998: {
5999: slotB = slotA;
6000: for (i++; i < name_count; i++)
6001: {
6002: slotB += name_entry_size;
6003: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6004: {
6005: condition = GET2(slotB, 0) == group_num;
6006: if (condition) break;
6007: }
6008: else break;
6009: }
6010: }
6011: }
6012: return condition;
6013: }
6014:
6015: static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6016: {
6017: DEFINE_COMPILER;
6018: int stacksize;
6019:
6020: if (framesize < 0)
6021: {
6022: if (framesize == no_frame)
6023: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6024: else
6025: {
6026: stacksize = needs_control_head ? 1 : 0;
6027: if (ket != OP_KET || has_alternatives)
6028: stacksize++;
6029: free_stack(common, stacksize);
6030: }
6031:
6032: if (needs_control_head)
6033: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6034:
6035: /* TMP2 which is set here used by OP_KETRMAX below. */
6036: if (ket == OP_KETRMAX)
6037: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6038: else if (ket == OP_KETRMIN)
6039: {
6040: /* Move the STR_PTR to the private_data_ptr. */
6041: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6042: }
6043: }
6044: else
6045: {
6046: stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6047: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6048: if (needs_control_head)
6049: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6050:
6051: if (ket == OP_KETRMAX)
6052: {
6053: /* TMP2 which is set here used by OP_KETRMAX below. */
6054: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6055: }
6056: }
6057: if (needs_control_head)
6058: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6059: }
6060:
6061: static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6062: {
6063: DEFINE_COMPILER;
6064:
6065: if (common->capture_last_ptr != 0)
6066: {
6067: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6068: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6069: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6070: stacksize++;
6071: }
6072: if (common->optimized_cbracket[offset >> 1] == 0)
6073: {
6074: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6075: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6076: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6077: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6078: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6079: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6080: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6081: stacksize += 2;
6082: }
6083: return stacksize;
6084: }
6085:
6086: /*
6087: Handling bracketed expressions is probably the most complex part.
6088:
6089: Stack layout naming characters:
6090: S - Push the current STR_PTR
6091: 0 - Push a 0 (NULL)
6092: A - Push the current STR_PTR. Needed for restoring the STR_PTR
6093: before the next alternative. Not pushed if there are no alternatives.
6094: M - Any values pushed by the current alternative. Can be empty, or anything.
6095: C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6096: L - Push the previous local (pointed by localptr) to the stack
6097: () - opional values stored on the stack
6098: ()* - optonal, can be stored multiple times
6099:
6100: The following list shows the regular expression templates, their PCRE byte codes
6101: and stack layout supported by pcre-sljit.
6102:
6103: (?:) OP_BRA | OP_KET A M
6104: () OP_CBRA | OP_KET C M
6105: (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6106: OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6107: (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6108: OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6109: ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6110: OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6111: ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6112: OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6113: (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6114: (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6115: ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6116: ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6117: (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6118: OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6119: (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6120: OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6121: ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6122: OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6123: ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6124: OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6125:
6126:
6127: Stack layout naming characters:
6128: A - Push the alternative index (starting from 0) on the stack.
6129: Not pushed if there is no alternatives.
6130: M - Any values pushed by the current alternative. Can be empty, or anything.
6131:
6132: The next list shows the possible content of a bracket:
6133: (|) OP_*BRA | OP_ALT ... M A
6134: (?()|) OP_*COND | OP_ALT M A
6135: (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6136: (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6137: Or nothing, if trace is unnecessary
6138: */
6139:
6140: static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6141: {
6142: DEFINE_COMPILER;
6143: backtrack_common *backtrack;
6144: pcre_uchar opcode;
6145: int private_data_ptr = 0;
6146: int offset = 0;
6147: int stacksize;
6148: int repeat_ptr = 0, repeat_length = 0;
6149: int repeat_type = 0, repeat_count = 0;
6150: pcre_uchar *ccbegin;
6151: pcre_uchar *matchingpath;
6152: pcre_uchar bra = OP_BRA;
6153: pcre_uchar ket;
6154: assert_backtrack *assert;
6155: BOOL has_alternatives;
6156: BOOL needs_control_head = FALSE;
6157: struct sljit_jump *jump;
6158: struct sljit_jump *skip;
6159: struct sljit_label *rmax_label = NULL;
6160: struct sljit_jump *braminzero = NULL;
6161:
6162: PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6163:
6164: if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6165: {
6166: bra = *cc;
6167: cc++;
6168: opcode = *cc;
6169: }
6170:
6171: opcode = *cc;
6172: ccbegin = cc;
6173: matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6174: ket = *matchingpath;
6175: if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6176: {
6177: repeat_ptr = PRIVATE_DATA(matchingpath);
6178: repeat_length = PRIVATE_DATA(matchingpath + 1);
6179: repeat_type = PRIVATE_DATA(matchingpath + 2);
6180: repeat_count = PRIVATE_DATA(matchingpath + 3);
6181: SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6182: if (repeat_type == OP_UPTO)
6183: ket = OP_KETRMAX;
6184: if (repeat_type == OP_MINUPTO)
6185: ket = OP_KETRMIN;
6186: }
6187:
6188: if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6189: {
6190: /* Drop this bracket_backtrack. */
6191: parent->top = backtrack->prev;
6192: return matchingpath + 1 + LINK_SIZE + repeat_length;
6193: }
6194:
6195: matchingpath = ccbegin + 1 + LINK_SIZE;
6196: SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6197: SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6198: cc += GET(cc, 1);
6199:
6200: has_alternatives = *cc == OP_ALT;
6201: if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6202: {
6203: has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6204: if (*matchingpath == OP_NRREF)
6205: {
6206: stacksize = GET2(matchingpath, 1);
6207: if (common->currententry == NULL || stacksize == RREF_ANY)
6208: has_alternatives = FALSE;
6209: else if (common->currententry->start == 0)
6210: has_alternatives = stacksize != 0;
6211: else
6212: has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6213: }
6214: }
6215:
6216: if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6217: opcode = OP_SCOND;
6218: if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6219: opcode = OP_ONCE;
6220:
6221: if (opcode == OP_CBRA || opcode == OP_SCBRA)
6222: {
6223: /* Capturing brackets has a pre-allocated space. */
6224: offset = GET2(ccbegin, 1 + LINK_SIZE);
6225: if (common->optimized_cbracket[offset] == 0)
6226: {
6227: private_data_ptr = OVECTOR_PRIV(offset);
6228: offset <<= 1;
6229: }
6230: else
6231: {
6232: offset <<= 1;
6233: private_data_ptr = OVECTOR(offset);
6234: }
6235: BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6236: matchingpath += IMM2_SIZE;
6237: }
6238: else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6239: {
6240: /* Other brackets simply allocate the next entry. */
6241: private_data_ptr = PRIVATE_DATA(ccbegin);
6242: SLJIT_ASSERT(private_data_ptr != 0);
6243: BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6244: if (opcode == OP_ONCE)
6245: BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6246: }
6247:
6248: /* Instructions before the first alternative. */
6249: stacksize = 0;
6250: if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6251: stacksize++;
6252: if (bra == OP_BRAZERO)
6253: stacksize++;
6254:
6255: if (stacksize > 0)
6256: allocate_stack(common, stacksize);
6257:
6258: stacksize = 0;
6259: if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6260: {
6261: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6262: stacksize++;
6263: }
6264:
6265: if (bra == OP_BRAZERO)
6266: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6267:
6268: if (bra == OP_BRAMINZERO)
6269: {
6270: /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6271: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6272: if (ket != OP_KETRMIN)
6273: {
6274: free_stack(common, 1);
6275: braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6276: }
6277: else
6278: {
6279: if (opcode == OP_ONCE || opcode >= OP_SBRA)
6280: {
6281: jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6282: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6283: /* Nothing stored during the first run. */
6284: skip = JUMP(SLJIT_JUMP);
6285: JUMPHERE(jump);
6286: /* Checking zero-length iteration. */
6287: if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6288: {
6289: /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6290: braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6291: }
6292: else
6293: {
6294: /* Except when the whole stack frame must be saved. */
6295: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6296: braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6297: }
6298: JUMPHERE(skip);
6299: }
6300: else
6301: {
6302: jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6303: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6304: JUMPHERE(jump);
6305: }
6306: }
6307: }
6308:
6309: if (repeat_type != 0)
6310: {
6311: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6312: if (repeat_type == OP_EXACT)
6313: rmax_label = LABEL();
6314: }
6315:
6316: if (ket == OP_KETRMIN)
6317: BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6318:
6319: if (ket == OP_KETRMAX)
6320: {
6321: rmax_label = LABEL();
6322: if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6323: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6324: }
6325:
6326: /* Handling capturing brackets and alternatives. */
6327: if (opcode == OP_ONCE)
6328: {
6329: stacksize = 0;
6330: if (needs_control_head)
6331: {
6332: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6333: stacksize++;
6334: }
6335:
6336: if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6337: {
6338: /* Neither capturing brackets nor recursions are found in the block. */
6339: if (ket == OP_KETRMIN)
6340: {
6341: stacksize += 2;
6342: if (!needs_control_head)
6343: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6344: }
6345: else
6346: {
6347: if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6348: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6349: if (ket == OP_KETRMAX || has_alternatives)
6350: stacksize++;
6351: }
6352:
6353: if (stacksize > 0)
6354: allocate_stack(common, stacksize);
6355:
6356: stacksize = 0;
6357: if (needs_control_head)
6358: {
6359: stacksize++;
6360: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6361: }
6362:
6363: if (ket == OP_KETRMIN)
6364: {
6365: if (needs_control_head)
6366: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6367: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6368: if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6369: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6370: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6371: }
6372: else if (ket == OP_KETRMAX || has_alternatives)
6373: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6374: }
6375: else
6376: {
6377: if (ket != OP_KET || has_alternatives)
6378: stacksize++;
6379:
6380: stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6381: allocate_stack(common, stacksize);
6382:
6383: if (needs_control_head)
6384: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6385:
6386: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6387: OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6388:
6389: stacksize = needs_control_head ? 1 : 0;
6390: if (ket != OP_KET || has_alternatives)
6391: {
6392: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6393: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6394: stacksize++;
6395: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6396: }
6397: else
6398: {
6399: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6400: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6401: }
6402: init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6403: }
6404: }
6405: else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6406: {
6407: /* Saving the previous values. */
6408: if (common->optimized_cbracket[offset >> 1] != 0)
6409: {
6410: SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6411: allocate_stack(common, 2);
6412: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6413: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6414: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6415: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6416: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6417: }
6418: else
6419: {
6420: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6421: allocate_stack(common, 1);
6422: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6423: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6424: }
6425: }
6426: else if (opcode == OP_SBRA || opcode == OP_SCOND)
6427: {
6428: /* Saving the previous value. */
6429: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6430: allocate_stack(common, 1);
6431: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6432: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6433: }
6434: else if (has_alternatives)
6435: {
6436: /* Pushing the starting string pointer. */
6437: allocate_stack(common, 1);
6438: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6439: }
6440:
6441: /* Generating code for the first alternative. */
6442: if (opcode == OP_COND || opcode == OP_SCOND)
6443: {
6444: if (*matchingpath == OP_CREF)
6445: {
6446: SLJIT_ASSERT(has_alternatives);
6447: add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6448: CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6449: matchingpath += 1 + IMM2_SIZE;
6450: }
6451: else if (*matchingpath == OP_NCREF)
6452: {
6453: SLJIT_ASSERT(has_alternatives);
6454: stacksize = GET2(matchingpath, 1);
6455: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6456:
6457: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6458: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6459: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6460: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6461: GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6462: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6463: sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6464: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6465: add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6466:
6467: JUMPHERE(jump);
6468: matchingpath += 1 + IMM2_SIZE;
6469: }
6470: else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6471: {
6472: /* Never has other case. */
6473: BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6474:
6475: stacksize = GET2(matchingpath, 1);
6476: if (common->currententry == NULL)
6477: stacksize = 0;
6478: else if (stacksize == RREF_ANY)
6479: stacksize = 1;
6480: else if (common->currententry->start == 0)
6481: stacksize = stacksize == 0;
6482: else
6483: stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6484:
6485: if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6486: {
6487: SLJIT_ASSERT(!has_alternatives);
6488: if (stacksize != 0)
6489: matchingpath += 1 + IMM2_SIZE;
6490: else
6491: {
6492: if (*cc == OP_ALT)
6493: {
6494: matchingpath = cc + 1 + LINK_SIZE;
6495: cc += GET(cc, 1);
6496: }
6497: else
6498: matchingpath = cc;
6499: }
6500: }
6501: else
6502: {
6503: SLJIT_ASSERT(has_alternatives);
6504:
6505: stacksize = GET2(matchingpath, 1);
6506: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6507: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6508: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6509: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6510: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6511: GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6512: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6513: sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6514: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6515: add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6516: matchingpath += 1 + IMM2_SIZE;
6517: }
6518: }
6519: else
6520: {
6521: SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6522: /* Similar code as PUSH_BACKTRACK macro. */
6523: assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6524: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6525: return NULL;
6526: memset(assert, 0, sizeof(assert_backtrack));
6527: assert->common.cc = matchingpath;
6528: BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6529: matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6530: }
6531: }
6532:
6533: compile_matchingpath(common, matchingpath, cc, backtrack);
6534: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6535: return NULL;
6536:
6537: if (opcode == OP_ONCE)
6538: match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6539:
6540: stacksize = 0;
6541: if (repeat_type == OP_MINUPTO)
6542: {
6543: /* We need to preserve the counter. TMP2 will be used below. */
6544: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6545: stacksize++;
6546: }
6547: if (ket != OP_KET || bra != OP_BRA)
6548: stacksize++;
6549: if (offset != 0)
6550: {
6551: if (common->capture_last_ptr != 0)
6552: stacksize++;
6553: if (common->optimized_cbracket[offset >> 1] == 0)
6554: stacksize += 2;
6555: }
6556: if (has_alternatives && opcode != OP_ONCE)
6557: stacksize++;
6558:
6559: if (stacksize > 0)
6560: allocate_stack(common, stacksize);
6561:
6562: stacksize = 0;
6563: if (repeat_type == OP_MINUPTO)
6564: {
6565: /* TMP2 was set above. */
6566: OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6567: stacksize++;
6568: }
6569:
6570: if (ket != OP_KET || bra != OP_BRA)
6571: {
6572: if (ket != OP_KET)
6573: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6574: else
6575: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6576: stacksize++;
6577: }
6578:
6579: if (offset != 0)
6580: stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6581:
6582: if (has_alternatives)
6583: {
6584: if (opcode != OP_ONCE)
6585: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6586: if (ket != OP_KETRMAX)
6587: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6588: }
6589:
6590: /* Must be after the matchingpath label. */
6591: if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6592: {
6593: SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6594: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6595: }
6596:
6597: if (ket == OP_KETRMAX)
6598: {
6599: if (repeat_type != 0)
6600: {
6601: if (has_alternatives)
6602: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6603: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6604: JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6605: /* Drop STR_PTR for greedy plus quantifier. */
6606: if (opcode != OP_ONCE)
6607: free_stack(common, 1);
6608: }
6609: else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6610: {
6611: if (has_alternatives)
6612: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6613: /* Checking zero-length iteration. */
6614: if (opcode != OP_ONCE)
6615: {
6616: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6617: /* Drop STR_PTR for greedy plus quantifier. */
6618: if (bra != OP_BRAZERO)
6619: free_stack(common, 1);
6620: }
6621: else
6622: /* TMP2 must contain the starting STR_PTR. */
6623: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
6624: }
6625: else
6626: JUMPTO(SLJIT_JUMP, rmax_label);
6627: BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6628: }
6629:
6630: if (repeat_type == OP_EXACT)
6631: {
6632: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6633: JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6634: }
6635: else if (repeat_type == OP_UPTO)
6636: {
6637: /* We need to preserve the counter. */
6638: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6639: allocate_stack(common, 1);
6640: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6641: }
6642:
6643: if (bra == OP_BRAZERO)
6644: BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6645:
6646: if (bra == OP_BRAMINZERO)
6647: {
6648: /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6649: JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6650: if (braminzero != NULL)
6651: {
6652: JUMPHERE(braminzero);
6653: /* We need to release the end pointer to perform the
6654: backtrack for the zero-length iteration. When
6655: framesize is < 0, OP_ONCE will do the release itself. */
6656: if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6657: {
6658: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6659: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6660: }
6661: else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6662: free_stack(common, 1);
6663: }
6664: /* Continue to the normal backtrack. */
6665: }
6666:
6667: if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6668: count_match(common);
6669:
6670: /* Skip the other alternatives. */
6671: while (*cc == OP_ALT)
6672: cc += GET(cc, 1);
6673: cc += 1 + LINK_SIZE;
6674:
6675: /* Temporarily encoding the needs_control_head in framesize. */
6676: if (opcode == OP_ONCE)
6677: BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6678: return cc + repeat_length;
6679: }
6680:
6681: static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6682: {
6683: DEFINE_COMPILER;
6684: backtrack_common *backtrack;
6685: pcre_uchar opcode;
6686: int private_data_ptr;
6687: int cbraprivptr = 0;
6688: BOOL needs_control_head;
6689: int framesize;
6690: int stacksize;
6691: int offset = 0;
6692: BOOL zero = FALSE;
6693: pcre_uchar *ccbegin = NULL;
6694: int stack; /* Also contains the offset of control head. */
6695: struct sljit_label *loop = NULL;
6696: struct jump_list *emptymatch = NULL;
6697:
6698: PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6699: if (*cc == OP_BRAPOSZERO)
6700: {
6701: zero = TRUE;
6702: cc++;
6703: }
6704:
6705: opcode = *cc;
6706: private_data_ptr = PRIVATE_DATA(cc);
6707: SLJIT_ASSERT(private_data_ptr != 0);
6708: BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6709: switch(opcode)
6710: {
6711: case OP_BRAPOS:
6712: case OP_SBRAPOS:
6713: ccbegin = cc + 1 + LINK_SIZE;
6714: break;
6715:
6716: case OP_CBRAPOS:
6717: case OP_SCBRAPOS:
6718: offset = GET2(cc, 1 + LINK_SIZE);
6719: /* This case cannot be optimized in the same was as
6720: normal capturing brackets. */
6721: SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6722: cbraprivptr = OVECTOR_PRIV(offset);
6723: offset <<= 1;
6724: ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6725: break;
6726:
6727: default:
6728: SLJIT_ASSERT_STOP();
6729: break;
6730: }
6731:
6732: framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6733: BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6734: if (framesize < 0)
6735: {
6736: if (offset != 0)
6737: {
6738: stacksize = 2;
6739: if (common->capture_last_ptr != 0)
6740: stacksize++;
6741: }
6742: else
6743: stacksize = 1;
6744:
6745: if (needs_control_head)
6746: stacksize++;
6747: if (!zero)
6748: stacksize++;
6749:
6750: BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6751: allocate_stack(common, stacksize);
6752: if (framesize == no_frame)
6753: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6754:
6755: stack = 0;
6756: if (offset != 0)
6757: {
6758: stack = 2;
6759: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6760: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6761: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6762: if (common->capture_last_ptr != 0)
6763: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6764: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6765: if (needs_control_head)
6766: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6767: if (common->capture_last_ptr != 0)
6768: {
6769: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6770: stack = 3;
6771: }
6772: }
6773: else
6774: {
6775: if (needs_control_head)
6776: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6777: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6778: stack = 1;
6779: }
6780:
6781: if (needs_control_head)
6782: stack++;
6783: if (!zero)
6784: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6785: if (needs_control_head)
6786: {
6787: stack--;
6788: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6789: }
6790: }
6791: else
6792: {
6793: stacksize = framesize + 1;
6794: if (!zero)
6795: stacksize++;
6796: if (needs_control_head)
6797: stacksize++;
6798: if (offset == 0)
6799: stacksize++;
6800: BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6801:
6802: allocate_stack(common, stacksize);
6803: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6804: if (needs_control_head)
6805: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6806: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6807:
6808: stack = 0;
6809: if (!zero)
6810: {
6811: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6812: stack = 1;
6813: }
6814: if (needs_control_head)
6815: {
6816: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6817: stack++;
6818: }
6819: if (offset == 0)
6820: {
6821: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6822: stack++;
6823: }
6824: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6825: init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
6826: stack -= 1 + (offset == 0);
6827: }
6828:
6829: if (offset != 0)
6830: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6831:
6832: loop = LABEL();
6833: while (*cc != OP_KETRPOS)
6834: {
6835: backtrack->top = NULL;
6836: backtrack->topbacktracks = NULL;
6837: cc += GET(cc, 1);
6838:
6839: compile_matchingpath(common, ccbegin, cc, backtrack);
6840: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6841: return NULL;
6842:
6843: if (framesize < 0)
6844: {
6845: if (framesize == no_frame)
6846: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6847:
6848: if (offset != 0)
6849: {
6850: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6851: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6852: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6853: if (common->capture_last_ptr != 0)
6854: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6855: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6856: }
6857: else
6858: {
6859: if (opcode == OP_SBRAPOS)
6860: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6861: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6862: }
6863:
6864: if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6865: add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6866:
6867: if (!zero)
6868: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6869: }
6870: else
6871: {
6872: if (offset != 0)
6873: {
6874: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6875: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6876: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6877: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6878: if (common->capture_last_ptr != 0)
6879: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6880: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6881: }
6882: else
6883: {
6884: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6885: OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6886: if (opcode == OP_SBRAPOS)
6887: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6888: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6889: }
6890:
6891: if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6892: add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6893:
6894: if (!zero)
6895: {
6896: if (framesize < 0)
6897: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6898: else
6899: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6900: }
6901: }
6902:
6903: if (needs_control_head)
6904: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
6905:
6906: JUMPTO(SLJIT_JUMP, loop);
6907: flush_stubs(common);
6908:
6909: compile_backtrackingpath(common, backtrack->top);
6910: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6911: return NULL;
6912: set_jumps(backtrack->topbacktracks, LABEL());
6913:
6914: if (framesize < 0)
6915: {
6916: if (offset != 0)
6917: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6918: else
6919: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6920: }
6921: else
6922: {
6923: if (offset != 0)
6924: {
6925: /* Last alternative. */
6926: if (*cc == OP_KETRPOS)
6927: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6928: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6929: }
6930: else
6931: {
6932: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6933: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6934: }
6935: }
6936:
6937: if (*cc == OP_KETRPOS)
6938: break;
6939: ccbegin = cc + 1 + LINK_SIZE;
6940: }
6941:
6942: /* We don't have to restore the control head in case of a failed match. */
6943:
6944: backtrack->topbacktracks = NULL;
6945: if (!zero)
6946: {
6947: if (framesize < 0)
6948: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6949: else /* TMP2 is set to [private_data_ptr] above. */
6950: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6951: }
6952:
6953: /* None of them matched. */
6954: set_jumps(emptymatch, LABEL());
6955: count_match(common);
6956: return cc + 1 + LINK_SIZE;
6957: }
6958:
6959: static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6960: {
6961: int class_len;
6962:
6963: *opcode = *cc;
6964: if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6965: {
6966: cc++;
6967: *type = OP_CHAR;
6968: }
6969: else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6970: {
6971: cc++;
6972: *type = OP_CHARI;
6973: *opcode -= OP_STARI - OP_STAR;
6974: }
6975: else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6976: {
6977: cc++;
6978: *type = OP_NOT;
6979: *opcode -= OP_NOTSTAR - OP_STAR;
6980: }
6981: else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6982: {
6983: cc++;
6984: *type = OP_NOTI;
6985: *opcode -= OP_NOTSTARI - OP_STAR;
6986: }
6987: else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6988: {
6989: cc++;
6990: *opcode -= OP_TYPESTAR - OP_STAR;
6991: *type = 0;
6992: }
6993: else
6994: {
6995: SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6996: *type = *opcode;
6997: cc++;
6998: class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6999: *opcode = cc[class_len - 1];
7000: if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7001: {
7002: *opcode -= OP_CRSTAR - OP_STAR;
7003: if (end != NULL)
7004: *end = cc + class_len;
7005: }
7006: else
7007: {
7008: SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
7009: *arg1 = GET2(cc, (class_len + IMM2_SIZE));
7010: *arg2 = GET2(cc, class_len);
7011:
7012: if (*arg2 == 0)
7013: {
7014: SLJIT_ASSERT(*arg1 != 0);
7015: *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
7016: }
7017: if (*arg1 == *arg2)
7018: *opcode = OP_EXACT;
7019:
7020: if (end != NULL)
7021: *end = cc + class_len + 2 * IMM2_SIZE;
7022: }
7023: return cc;
7024: }
7025:
7026: if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7027: {
7028: *arg1 = GET2(cc, 0);
7029: cc += IMM2_SIZE;
7030: }
7031:
7032: if (*type == 0)
7033: {
7034: *type = *cc;
7035: if (end != NULL)
7036: *end = next_opcode(common, cc);
7037: cc++;
7038: return cc;
7039: }
7040:
7041: if (end != NULL)
7042: {
7043: *end = cc + 1;
7044: #ifdef SUPPORT_UTF
7045: if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7046: #endif
7047: }
7048: return cc;
7049: }
7050:
7051: static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7052: {
7053: DEFINE_COMPILER;
7054: backtrack_common *backtrack;
7055: pcre_uchar opcode;
7056: pcre_uchar type;
7057: int arg1 = -1, arg2 = -1;
7058: pcre_uchar* end;
7059: jump_list *nomatch = NULL;
7060: struct sljit_jump *jump = NULL;
7061: struct sljit_label *label;
7062: int private_data_ptr = PRIVATE_DATA(cc);
7063: int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
7064: int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7065: int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7066: int tmp_base, tmp_offset;
7067:
7068: PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7069:
7070: cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
7071:
7072: switch(type)
7073: {
7074: case OP_NOT_DIGIT:
7075: case OP_DIGIT:
7076: case OP_NOT_WHITESPACE:
7077: case OP_WHITESPACE:
7078: case OP_NOT_WORDCHAR:
7079: case OP_WORDCHAR:
7080: case OP_ANY:
7081: case OP_ALLANY:
7082: case OP_ANYBYTE:
7083: case OP_ANYNL:
7084: case OP_NOT_HSPACE:
7085: case OP_HSPACE:
7086: case OP_NOT_VSPACE:
7087: case OP_VSPACE:
7088: case OP_CHAR:
7089: case OP_CHARI:
7090: case OP_NOT:
7091: case OP_NOTI:
7092: case OP_CLASS:
7093: case OP_NCLASS:
7094: tmp_base = TMP3;
7095: tmp_offset = 0;
7096: break;
7097:
7098: default:
7099: SLJIT_ASSERT_STOP();
7100: /* Fall through. */
7101:
7102: case OP_EXTUNI:
7103: case OP_XCLASS:
7104: case OP_NOTPROP:
7105: case OP_PROP:
7106: tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
7107: tmp_offset = POSSESSIVE0;
7108: break;
7109: }
7110:
7111: switch(opcode)
7112: {
7113: case OP_STAR:
7114: case OP_PLUS:
7115: case OP_UPTO:
7116: case OP_CRRANGE:
7117: if (type == OP_ANYNL || type == OP_EXTUNI)
7118: {
7119: SLJIT_ASSERT(private_data_ptr == 0);
7120: if (opcode == OP_STAR || opcode == OP_UPTO)
7121: {
7122: allocate_stack(common, 2);
7123: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7124: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7125: }
7126: else
7127: {
7128: allocate_stack(common, 1);
7129: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7130: }
7131:
7132: if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7133: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
7134:
7135: label = LABEL();
7136: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7137: if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7138: {
7139: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
7140: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7141: if (opcode == OP_CRRANGE && arg2 > 0)
7142: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
7143: if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
7144: jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
7145: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
7146: }
7147:
7148: /* We cannot use TMP3 because of this allocate_stack. */
7149: allocate_stack(common, 1);
7150: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7151: JUMPTO(SLJIT_JUMP, label);
7152: if (jump != NULL)
7153: JUMPHERE(jump);
7154: }
7155: else
7156: {
7157: if (opcode == OP_PLUS)
7158: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7159: if (private_data_ptr == 0)
7160: allocate_stack(common, 2);
7161: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7162: if (opcode <= OP_PLUS)
7163: OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
7164: else
7165: OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
7166: label = LABEL();
7167: compile_char1_matchingpath(common, type, cc, &nomatch);
7168: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7169: if (opcode <= OP_PLUS)
7170: JUMPTO(SLJIT_JUMP, label);
7171: else if (opcode == OP_CRRANGE && arg1 == 0)
7172: {
7173: OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7174: JUMPTO(SLJIT_JUMP, label);
7175: }
7176: else
7177: {
7178: OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7179: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7180: OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7181: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
7182: }
7183: set_jumps(nomatch, LABEL());
7184: if (opcode == OP_CRRANGE)
7185: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
7186: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7187: }
7188: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7189: break;
7190:
7191: case OP_MINSTAR:
7192: case OP_MINPLUS:
7193: if (opcode == OP_MINPLUS)
7194: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7195: if (private_data_ptr == 0)
7196: allocate_stack(common, 1);
7197: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7198: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7199: break;
7200:
7201: case OP_MINUPTO:
7202: case OP_CRMINRANGE:
7203: if (private_data_ptr == 0)
7204: allocate_stack(common, 2);
7205: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7206: OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
7207: if (opcode == OP_CRMINRANGE)
7208: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
7209: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7210: break;
7211:
7212: case OP_QUERY:
7213: case OP_MINQUERY:
7214: if (private_data_ptr == 0)
7215: allocate_stack(common, 1);
7216: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7217: if (opcode == OP_QUERY)
7218: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7219: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7220: break;
7221:
7222: case OP_EXACT:
7223: OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
7224: label = LABEL();
7225: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7226: OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
7227: JUMPTO(SLJIT_C_NOT_ZERO, label);
7228: break;
7229:
7230: case OP_POSSTAR:
7231: case OP_POSPLUS:
7232: case OP_POSUPTO:
7233: if (opcode == OP_POSPLUS)
7234: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7235: if (opcode == OP_POSUPTO)
7236: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
7237: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7238: label = LABEL();
7239: compile_char1_matchingpath(common, type, cc, &nomatch);
7240: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7241: if (opcode != OP_POSUPTO)
7242: JUMPTO(SLJIT_JUMP, label);
7243: else
7244: {
7245: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
7246: JUMPTO(SLJIT_C_NOT_ZERO, label);
7247: }
7248: set_jumps(nomatch, LABEL());
7249: OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
7250: break;
7251:
7252: case OP_POSQUERY:
7253: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7254: compile_char1_matchingpath(common, type, cc, &nomatch);
7255: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7256: set_jumps(nomatch, LABEL());
7257: OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
7258: break;
7259:
7260: default:
7261: SLJIT_ASSERT_STOP();
7262: break;
7263: }
7264:
7265: count_match(common);
7266: return end;
7267: }
7268:
7269: static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7270: {
7271: DEFINE_COMPILER;
7272: backtrack_common *backtrack;
7273:
7274: PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7275:
7276: if (*cc == OP_FAIL)
7277: {
7278: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
7279: return cc + 1;
7280: }
7281:
7282: if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
7283: {
7284: /* No need to check notempty conditions. */
7285: if (common->accept_label == NULL)
7286: add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
7287: else
7288: JUMPTO(SLJIT_JUMP, common->accept_label);
7289: return cc + 1;
7290: }
7291:
7292: if (common->accept_label == NULL)
7293: add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
7294: else
7295: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
7296: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7297: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
7298: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7299: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
7300: if (common->accept_label == NULL)
7301: add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7302: else
7303: CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
7304: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7305: if (common->accept_label == NULL)
7306: add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
7307: else
7308: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
7309: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
7310: return cc + 1;
7311: }
7312:
7313: static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
7314: {
7315: DEFINE_COMPILER;
7316: int offset = GET2(cc, 1);
7317: BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
7318:
7319: /* Data will be discarded anyway... */
7320: if (common->currententry != NULL)
7321: return cc + 1 + IMM2_SIZE;
7322:
7323: if (!optimized_cbracket)
7324: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
7325: offset <<= 1;
7326: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
7327: if (!optimized_cbracket)
7328: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
7329: return cc + 1 + IMM2_SIZE;
7330: }
7331:
7332: static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7333: {
7334: DEFINE_COMPILER;
7335: backtrack_common *backtrack;
7336: pcre_uchar opcode = *cc;
7337: pcre_uchar *ccend = cc + 1;
7338:
7339: if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
7340: ccend += 2 + cc[1];
7341:
7342: PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7343:
7344: if (opcode == OP_SKIP)
7345: {
7346: allocate_stack(common, 1);
7347: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7348: return ccend;
7349: }
7350:
7351: if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
7352: {
7353: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7354: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
7355: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
7356: OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
7357: }
7358:
7359: return ccend;
7360: }
7361:
7362: static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
7363:
7364: static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
7365: {
7366: DEFINE_COMPILER;
7367: backtrack_common *backtrack;
7368: BOOL needs_control_head;
7369: int size;
7370:
7371: PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
7372: common->then_trap = BACKTRACK_AS(then_trap_backtrack);
7373: BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
7374: BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
7375: BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
7376:
7377: size = BACKTRACK_AS(then_trap_backtrack)->framesize;
7378: size = 3 + (size < 0 ? 0 : size);
7379:
7380: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7381: allocate_stack(common, size);
7382: if (size > 3)
7383: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
7384: else
7385: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
7386: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
7387: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
7388: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
7389:
7390: size = BACKTRACK_AS(then_trap_backtrack)->framesize;
7391: if (size >= 0)
7392: init_frame(common, cc, ccend, size - 1, 0, FALSE);
7393: }
7394:
7395: static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
7396: {
7397: DEFINE_COMPILER;
7398: backtrack_common *backtrack;
7399: BOOL has_then_trap = FALSE;
7400: then_trap_backtrack *save_then_trap = NULL;
7401:
7402: SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
7403:
7404: if (common->has_then && common->then_offsets[cc - common->start] != 0)
7405: {
7406: SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
7407: has_then_trap = TRUE;
7408: save_then_trap = common->then_trap;
7409: /* Tail item on backtrack. */
7410: compile_then_trap_matchingpath(common, cc, ccend, parent);
7411: }
7412:
7413: while (cc < ccend)
7414: {
7415: switch(*cc)
7416: {
7417: case OP_SOD:
7418: case OP_SOM:
7419: case OP_NOT_WORD_BOUNDARY:
7420: case OP_WORD_BOUNDARY:
7421: case OP_NOT_DIGIT:
7422: case OP_DIGIT:
7423: case OP_NOT_WHITESPACE:
7424: case OP_WHITESPACE:
7425: case OP_NOT_WORDCHAR:
7426: case OP_WORDCHAR:
7427: case OP_ANY:
7428: case OP_ALLANY:
7429: case OP_ANYBYTE:
7430: case OP_NOTPROP:
7431: case OP_PROP:
7432: case OP_ANYNL:
7433: case OP_NOT_HSPACE:
7434: case OP_HSPACE:
7435: case OP_NOT_VSPACE:
7436: case OP_VSPACE:
7437: case OP_EXTUNI:
7438: case OP_EODN:
7439: case OP_EOD:
7440: case OP_CIRC:
7441: case OP_CIRCM:
7442: case OP_DOLL:
7443: case OP_DOLLM:
7444: case OP_NOT:
7445: case OP_NOTI:
7446: case OP_REVERSE:
7447: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
7448: break;
7449:
7450: case OP_SET_SOM:
7451: PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
7452: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
7453: allocate_stack(common, 1);
7454: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
7455: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7456: cc++;
7457: break;
7458:
7459: case OP_CHAR:
7460: case OP_CHARI:
7461: if (common->mode == JIT_COMPILE)
7462: cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
7463: else
7464: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
7465: break;
7466:
7467: case OP_STAR:
7468: case OP_MINSTAR:
7469: case OP_PLUS:
7470: case OP_MINPLUS:
7471: case OP_QUERY:
7472: case OP_MINQUERY:
7473: case OP_UPTO:
7474: case OP_MINUPTO:
7475: case OP_EXACT:
7476: case OP_POSSTAR:
7477: case OP_POSPLUS:
7478: case OP_POSQUERY:
7479: case OP_POSUPTO:
7480: case OP_STARI:
7481: case OP_MINSTARI:
7482: case OP_PLUSI:
7483: case OP_MINPLUSI:
7484: case OP_QUERYI:
7485: case OP_MINQUERYI:
7486: case OP_UPTOI:
7487: case OP_MINUPTOI:
7488: case OP_EXACTI:
7489: case OP_POSSTARI:
7490: case OP_POSPLUSI:
7491: case OP_POSQUERYI:
7492: case OP_POSUPTOI:
7493: case OP_NOTSTAR:
7494: case OP_NOTMINSTAR:
7495: case OP_NOTPLUS:
7496: case OP_NOTMINPLUS:
7497: case OP_NOTQUERY:
7498: case OP_NOTMINQUERY:
7499: case OP_NOTUPTO:
7500: case OP_NOTMINUPTO:
7501: case OP_NOTEXACT:
7502: case OP_NOTPOSSTAR:
7503: case OP_NOTPOSPLUS:
7504: case OP_NOTPOSQUERY:
7505: case OP_NOTPOSUPTO:
7506: case OP_NOTSTARI:
7507: case OP_NOTMINSTARI:
7508: case OP_NOTPLUSI:
7509: case OP_NOTMINPLUSI:
7510: case OP_NOTQUERYI:
7511: case OP_NOTMINQUERYI:
7512: case OP_NOTUPTOI:
7513: case OP_NOTMINUPTOI:
7514: case OP_NOTEXACTI:
7515: case OP_NOTPOSSTARI:
7516: case OP_NOTPOSPLUSI:
7517: case OP_NOTPOSQUERYI:
7518: case OP_NOTPOSUPTOI:
7519: case OP_TYPESTAR:
7520: case OP_TYPEMINSTAR:
7521: case OP_TYPEPLUS:
7522: case OP_TYPEMINPLUS:
7523: case OP_TYPEQUERY:
7524: case OP_TYPEMINQUERY:
7525: case OP_TYPEUPTO:
7526: case OP_TYPEMINUPTO:
7527: case OP_TYPEEXACT:
7528: case OP_TYPEPOSSTAR:
7529: case OP_TYPEPOSPLUS:
7530: case OP_TYPEPOSQUERY:
7531: case OP_TYPEPOSUPTO:
7532: cc = compile_iterator_matchingpath(common, cc, parent);
7533: break;
7534:
7535: case OP_CLASS:
7536: case OP_NCLASS:
7537: if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
7538: cc = compile_iterator_matchingpath(common, cc, parent);
7539: else
7540: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
7541: break;
7542:
7543: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
7544: case OP_XCLASS:
7545: if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
7546: cc = compile_iterator_matchingpath(common, cc, parent);
7547: else
7548: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
7549: break;
7550: #endif
7551:
7552: case OP_REF:
7553: case OP_REFI:
7554: if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
7555: cc = compile_ref_iterator_matchingpath(common, cc, parent);
7556: else
7557: cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
7558: break;
7559:
7560: case OP_RECURSE:
7561: cc = compile_recurse_matchingpath(common, cc, parent);
7562: break;
7563:
7564: case OP_CALLOUT:
7565: cc = compile_callout_matchingpath(common, cc, parent);
7566: break;
7567:
7568: case OP_ASSERT:
7569: case OP_ASSERT_NOT:
7570: case OP_ASSERTBACK:
7571: case OP_ASSERTBACK_NOT:
7572: PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
7573: cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
7574: break;
7575:
7576: case OP_BRAMINZERO:
7577: PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
7578: cc = bracketend(cc + 1);
7579: if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
7580: {
7581: allocate_stack(common, 1);
7582: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7583: }
7584: else
7585: {
7586: allocate_stack(common, 2);
7587: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7588: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
7589: }
7590: BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
7591: if (cc[1] > OP_ASSERTBACK_NOT)
7592: count_match(common);
7593: break;
7594:
7595: case OP_ONCE:
7596: case OP_ONCE_NC:
7597: case OP_BRA:
7598: case OP_CBRA:
7599: case OP_COND:
7600: case OP_SBRA:
7601: case OP_SCBRA:
7602: case OP_SCOND:
7603: cc = compile_bracket_matchingpath(common, cc, parent);
7604: break;
7605:
7606: case OP_BRAZERO:
7607: if (cc[1] > OP_ASSERTBACK_NOT)
7608: cc = compile_bracket_matchingpath(common, cc, parent);
7609: else
7610: {
7611: PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
7612: cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
7613: }
7614: break;
7615:
7616: case OP_BRAPOS:
7617: case OP_CBRAPOS:
7618: case OP_SBRAPOS:
7619: case OP_SCBRAPOS:
7620: case OP_BRAPOSZERO:
7621: cc = compile_bracketpos_matchingpath(common, cc, parent);
7622: break;
7623:
7624: case OP_MARK:
7625: PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
7626: SLJIT_ASSERT(common->mark_ptr != 0);
7627: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
7628: allocate_stack(common, common->has_skip_arg ? 5 : 1);
7629: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7630: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
7631: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
7632: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
7633: OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
7634: if (common->has_skip_arg)
7635: {
7636: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7637: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
7638: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
7639: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
7640: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
7641: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7642: }
7643: cc += 1 + 2 + cc[1];
7644: break;
7645:
7646: case OP_PRUNE:
7647: case OP_PRUNE_ARG:
7648: case OP_SKIP:
7649: case OP_SKIP_ARG:
7650: case OP_THEN:
7651: case OP_THEN_ARG:
7652: case OP_COMMIT:
7653: cc = compile_control_verb_matchingpath(common, cc, parent);
7654: break;
7655:
7656: case OP_FAIL:
7657: case OP_ACCEPT:
7658: case OP_ASSERT_ACCEPT:
7659: cc = compile_fail_accept_matchingpath(common, cc, parent);
7660: break;
7661:
7662: case OP_CLOSE:
7663: cc = compile_close_matchingpath(common, cc);
7664: break;
7665:
7666: case OP_SKIPZERO:
7667: cc = bracketend(cc + 1);
7668: break;
7669:
7670: default:
7671: SLJIT_ASSERT_STOP();
7672: return;
7673: }
7674: if (cc == NULL)
7675: return;
7676: }
7677:
7678: if (has_then_trap)
7679: {
7680: /* Head item on backtrack. */
7681: PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
7682: BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
7683: BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
7684: common->then_trap = save_then_trap;
7685: }
7686: SLJIT_ASSERT(cc == ccend);
7687: }
7688:
7689: #undef PUSH_BACKTRACK
7690: #undef PUSH_BACKTRACK_NOVALUE
7691: #undef BACKTRACK_AS
7692:
7693: #define COMPILE_BACKTRACKINGPATH(current) \
7694: do \
7695: { \
7696: compile_backtrackingpath(common, (current)); \
7697: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7698: return; \
7699: } \
7700: while (0)
7701:
7702: #define CURRENT_AS(type) ((type *)current)
7703:
7704: static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
7705: {
7706: DEFINE_COMPILER;
7707: pcre_uchar *cc = current->cc;
7708: pcre_uchar opcode;
7709: pcre_uchar type;
7710: int arg1 = -1, arg2 = -1;
7711: struct sljit_label *label = NULL;
7712: struct sljit_jump *jump = NULL;
7713: jump_list *jumplist = NULL;
7714: int private_data_ptr = PRIVATE_DATA(cc);
7715: int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
7716: int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7717: int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7718:
7719: cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
7720:
7721: switch(opcode)
7722: {
7723: case OP_STAR:
7724: case OP_PLUS:
7725: case OP_UPTO:
7726: case OP_CRRANGE:
7727: if (type == OP_ANYNL || type == OP_EXTUNI)
7728: {
7729: SLJIT_ASSERT(private_data_ptr == 0);
7730: set_jumps(current->topbacktracks, LABEL());
7731: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7732: free_stack(common, 1);
7733: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
7734: }
7735: else
7736: {
7737: if (opcode == OP_UPTO)
7738: arg2 = 0;
7739: if (opcode <= OP_PLUS)
7740: {
7741: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7742: jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
7743: }
7744: else
7745: {
7746: OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7747: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7748: jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
7749: OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
7750: }
7751: skip_char_back(common);
7752: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7753: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
7754: if (opcode == OP_CRRANGE)
7755: set_jumps(current->topbacktracks, LABEL());
7756: JUMPHERE(jump);
7757: if (private_data_ptr == 0)
7758: free_stack(common, 2);
7759: if (opcode == OP_PLUS)
7760: set_jumps(current->topbacktracks, LABEL());
7761: }
7762: break;
7763:
7764: case OP_MINSTAR:
7765: case OP_MINPLUS:
7766: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7767: compile_char1_matchingpath(common, type, cc, &jumplist);
7768: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7769: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
7770: set_jumps(jumplist, LABEL());
7771: if (private_data_ptr == 0)
7772: free_stack(common, 1);
7773: if (opcode == OP_MINPLUS)
7774: set_jumps(current->topbacktracks, LABEL());
7775: break;
7776:
7777: case OP_MINUPTO:
7778: case OP_CRMINRANGE:
7779: if (opcode == OP_CRMINRANGE)
7780: {
7781: label = LABEL();
7782: set_jumps(current->topbacktracks, label);
7783: }
7784: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7785: compile_char1_matchingpath(common, type, cc, &jumplist);
7786:
7787: OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7788: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7789: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7790: OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7791:
7792: if (opcode == OP_CRMINRANGE)
7793: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
7794:
7795: if (opcode == OP_CRMINRANGE && arg1 == 0)
7796: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
7797: else
7798: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
7799:
7800: set_jumps(jumplist, LABEL());
7801: if (private_data_ptr == 0)
7802: free_stack(common, 2);
7803: break;
7804:
7805: case OP_QUERY:
7806: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7807: OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
7808: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
7809: jump = JUMP(SLJIT_JUMP);
7810: set_jumps(current->topbacktracks, LABEL());
7811: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7812: OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
7813: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
7814: JUMPHERE(jump);
7815: if (private_data_ptr == 0)
7816: free_stack(common, 1);
7817: break;
7818:
7819: case OP_MINQUERY:
7820: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7821: OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
7822: jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7823: compile_char1_matchingpath(common, type, cc, &jumplist);
7824: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
7825: set_jumps(jumplist, LABEL());
7826: JUMPHERE(jump);
7827: if (private_data_ptr == 0)
7828: free_stack(common, 1);
7829: break;
7830:
7831: case OP_EXACT:
7832: case OP_POSPLUS:
7833: set_jumps(current->topbacktracks, LABEL());
7834: break;
7835:
7836: case OP_POSSTAR:
7837: case OP_POSQUERY:
7838: case OP_POSUPTO:
7839: break;
7840:
7841: default:
7842: SLJIT_ASSERT_STOP();
7843: break;
7844: }
7845: }
7846:
7847: static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
7848: {
7849: DEFINE_COMPILER;
7850: pcre_uchar *cc = current->cc;
7851: pcre_uchar type;
7852:
7853: type = cc[1 + IMM2_SIZE];
7854: if ((type & 0x1) == 0)
7855: {
7856: set_jumps(current->topbacktracks, LABEL());
7857: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7858: free_stack(common, 1);
7859: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
7860: return;
7861: }
7862:
7863: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7864: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
7865: set_jumps(current->topbacktracks, LABEL());
7866: free_stack(common, 2);
7867: }
7868:
7869: static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
7870: {
7871: DEFINE_COMPILER;
7872:
7873: if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
7874: compile_backtrackingpath(common, current->top);
7875: set_jumps(current->topbacktracks, LABEL());
7876: if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
7877: return;
7878:
7879: if (common->has_set_som && common->mark_ptr != 0)
7880: {
7881: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7882: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7883: free_stack(common, 2);
7884: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
7885: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
7886: }
7887: else if (common->has_set_som || common->mark_ptr != 0)
7888: {
7889: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7890: free_stack(common, 1);
7891: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
7892: }
7893: }
7894:
7895: static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
7896: {
7897: DEFINE_COMPILER;
7898: pcre_uchar *cc = current->cc;
7899: pcre_uchar bra = OP_BRA;
7900: struct sljit_jump *brajump = NULL;
7901:
7902: SLJIT_ASSERT(*cc != OP_BRAMINZERO);
7903: if (*cc == OP_BRAZERO)
7904: {
7905: bra = *cc;
7906: cc++;
7907: }
7908:
7909: if (bra == OP_BRAZERO)
7910: {
7911: SLJIT_ASSERT(current->topbacktracks == NULL);
7912: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7913: }
7914:
7915: if (CURRENT_AS(assert_backtrack)->framesize < 0)
7916: {
7917: set_jumps(current->topbacktracks, LABEL());
7918:
7919: if (bra == OP_BRAZERO)
7920: {
7921: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7922: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
7923: free_stack(common, 1);
7924: }
7925: return;
7926: }
7927:
7928: if (bra == OP_BRAZERO)
7929: {
7930: if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
7931: {
7932: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7933: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
7934: free_stack(common, 1);
7935: return;
7936: }
7937: free_stack(common, 1);
7938: brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7939: }
7940:
7941: if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
7942: {
7943: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr);
7944: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7945: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
7946:
7947: set_jumps(current->topbacktracks, LABEL());
7948: }
7949: else
7950: set_jumps(current->topbacktracks, LABEL());
7951:
7952: if (bra == OP_BRAZERO)
7953: {
7954: /* We know there is enough place on the stack. */
7955: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7956: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7957: JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
7958: JUMPHERE(brajump);
7959: }
7960: }
7961:
7962: static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
7963: {
7964: DEFINE_COMPILER;
7965: int opcode, stacksize, count;
7966: int offset = 0;
7967: int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
7968: int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
7969: pcre_uchar *cc = current->cc;
7970: pcre_uchar *ccbegin;
7971: pcre_uchar *ccprev;
7972: jump_list *jumplist = NULL;
7973: jump_list *jumplistitem = NULL;
7974: pcre_uchar bra = OP_BRA;
7975: pcre_uchar ket;
7976: assert_backtrack *assert;
7977: BOOL has_alternatives;
7978: BOOL needs_control_head = FALSE;
7979: struct sljit_jump *brazero = NULL;
7980: struct sljit_jump *once = NULL;
7981: struct sljit_jump *cond = NULL;
7982: struct sljit_label *rmin_label = NULL;
7983: struct sljit_label *exact_label = NULL;
7984:
7985: if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7986: {
7987: bra = *cc;
7988: cc++;
7989: }
7990:
7991: opcode = *cc;
7992: ccbegin = bracketend(cc) - 1 - LINK_SIZE;
7993: ket = *ccbegin;
7994: if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
7995: {
7996: repeat_ptr = PRIVATE_DATA(ccbegin);
7997: repeat_type = PRIVATE_DATA(ccbegin + 2);
7998: repeat_count = PRIVATE_DATA(ccbegin + 3);
7999: SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8000: if (repeat_type == OP_UPTO)
8001: ket = OP_KETRMAX;
8002: if (repeat_type == OP_MINUPTO)
8003: ket = OP_KETRMIN;
8004: }
8005: ccbegin = cc;
8006: cc += GET(cc, 1);
8007: has_alternatives = *cc == OP_ALT;
8008: if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8009: has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8010: if (opcode == OP_CBRA || opcode == OP_SCBRA)
8011: offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8012: if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8013: opcode = OP_SCOND;
8014: if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8015: opcode = OP_ONCE;
8016:
8017: /* Decoding the needs_control_head in framesize. */
8018: if (opcode == OP_ONCE)
8019: {
8020: needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8021: CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8022: }
8023:
8024: if (ket != OP_KET && repeat_type != 0)
8025: {
8026: /* TMP1 is used in OP_KETRMIN below. */
8027: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8028: free_stack(common, 1);
8029: if (repeat_type == OP_UPTO)
8030: OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8031: else
8032: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
8033: }
8034:
8035: if (ket == OP_KETRMAX)
8036: {
8037: if (bra == OP_BRAZERO)
8038: {
8039: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8040: free_stack(common, 1);
8041: brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8042: }
8043: }
8044: else if (ket == OP_KETRMIN)
8045: {
8046: if (bra != OP_BRAMINZERO)
8047: {
8048: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8049: if (repeat_type != 0)
8050: {
8051: /* TMP1 was set a few lines above. */
8052: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8053: /* Drop STR_PTR for non-greedy plus quantifier. */
8054: if (opcode != OP_ONCE)
8055: free_stack(common, 1);
8056: }
8057: else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8058: {
8059: /* Checking zero-length iteration. */
8060: if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8061: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8062: else
8063: {
8064: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8065: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8066: }
8067: /* Drop STR_PTR for non-greedy plus quantifier. */
8068: if (opcode != OP_ONCE)
8069: free_stack(common, 1);
8070: }
8071: else
8072: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8073: }
8074: rmin_label = LABEL();
8075: if (repeat_type != 0)
8076: OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8077: }
8078: else if (bra == OP_BRAZERO)
8079: {
8080: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8081: free_stack(common, 1);
8082: brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8083: }
8084: else if (repeat_type == OP_EXACT)
8085: {
8086: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8087: exact_label = LABEL();
8088: }
8089:
8090: if (offset != 0)
8091: {
8092: if (common->capture_last_ptr != 0)
8093: {
8094: SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8095: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8096: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8097: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
8098: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8099: free_stack(common, 3);
8100: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0);
8101: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
8102: }
8103: else if (common->optimized_cbracket[offset >> 1] == 0)
8104: {
8105: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8106: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8107: free_stack(common, 2);
8108: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8109: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
8110: }
8111: }
8112:
8113: if (SLJIT_UNLIKELY(opcode == OP_ONCE))
8114: {
8115: if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8116: {
8117: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8118: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8119: }
8120: once = JUMP(SLJIT_JUMP);
8121: }
8122: else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8123: {
8124: if (has_alternatives)
8125: {
8126: /* Always exactly one alternative. */
8127: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8128: free_stack(common, 1);
8129:
8130: jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
8131: if (SLJIT_UNLIKELY(!jumplistitem))
8132: return;
8133: jumplist = jumplistitem;
8134: jumplistitem->next = NULL;
8135: jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
8136: }
8137: }
8138: else if (*cc == OP_ALT)
8139: {
8140: /* Build a jump list. Get the last successfully matched branch index. */
8141: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8142: free_stack(common, 1);
8143: count = 1;
8144: do
8145: {
8146: /* Append as the last item. */
8147: if (jumplist != NULL)
8148: {
8149: jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
8150: jumplistitem = jumplistitem->next;
8151: }
8152: else
8153: {
8154: jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
8155: jumplist = jumplistitem;
8156: }
8157:
8158: if (SLJIT_UNLIKELY(!jumplistitem))
8159: return;
8160:
8161: jumplistitem->next = NULL;
8162: jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
8163: cc += GET(cc, 1);
8164: }
8165: while (*cc == OP_ALT);
8166:
8167: cc = ccbegin + GET(ccbegin, 1);
8168: }
8169:
8170: COMPILE_BACKTRACKINGPATH(current->top);
8171: if (current->topbacktracks)
8172: set_jumps(current->topbacktracks, LABEL());
8173:
8174: if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8175: {
8176: /* Conditional block always has at most one alternative. */
8177: if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
8178: {
8179: SLJIT_ASSERT(has_alternatives);
8180: assert = CURRENT_AS(bracket_backtrack)->u.assert;
8181: if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
8182: {
8183: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
8184: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8185: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
8186: }
8187: cond = JUMP(SLJIT_JUMP);
8188: set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
8189: }
8190: else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
8191: {
8192: SLJIT_ASSERT(has_alternatives);
8193: cond = JUMP(SLJIT_JUMP);
8194: set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
8195: }
8196: else
8197: SLJIT_ASSERT(!has_alternatives);
8198: }
8199:
8200: if (has_alternatives)
8201: {
8202: count = 1;
8203: do
8204: {
8205: current->top = NULL;
8206: current->topbacktracks = NULL;
8207: current->nextbacktracks = NULL;
8208: /* Conditional blocks always have an additional alternative, even if it is empty. */
8209: if (*cc == OP_ALT)
8210: {
8211: ccprev = cc + 1 + LINK_SIZE;
8212: cc += GET(cc, 1);
8213: if (opcode != OP_COND && opcode != OP_SCOND)
8214: {
8215: if (opcode != OP_ONCE)
8216: {
8217: if (private_data_ptr != 0)
8218: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8219: else
8220: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8221: }
8222: else
8223: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
8224: }
8225: compile_matchingpath(common, ccprev, cc, current);
8226: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8227: return;
8228: }
8229:
8230: /* Instructions after the current alternative is successfully matched. */
8231: /* There is a similar code in compile_bracket_matchingpath. */
8232: if (opcode == OP_ONCE)
8233: match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8234:
8235: stacksize = 0;
8236: if (repeat_type == OP_MINUPTO)
8237: {
8238: /* We need to preserve the counter. TMP2 will be used below. */
8239: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
8240: stacksize++;
8241: }
8242: if (ket != OP_KET || bra != OP_BRA)
8243: stacksize++;
8244: if (offset != 0)
8245: {
8246: if (common->capture_last_ptr != 0)
8247: stacksize++;
8248: if (common->optimized_cbracket[offset >> 1] == 0)
8249: stacksize += 2;
8250: }
8251: if (opcode != OP_ONCE)
8252: stacksize++;
8253:
8254: if (stacksize > 0)
8255: allocate_stack(common, stacksize);
8256:
8257: stacksize = 0;
8258: if (repeat_type == OP_MINUPTO)
8259: {
8260: /* TMP2 was set above. */
8261: OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8262: stacksize++;
8263: }
8264:
8265: if (ket != OP_KET || bra != OP_BRA)
8266: {
8267: if (ket != OP_KET)
8268: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8269: else
8270: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8271: stacksize++;
8272: }
8273:
8274: if (offset != 0)
8275: stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8276:
8277: if (opcode != OP_ONCE)
8278: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
8279:
8280: if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
8281: {
8282: /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
8283: SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8284: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
8285: }
8286:
8287: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
8288:
8289: if (opcode != OP_ONCE)
8290: {
8291: SLJIT_ASSERT(jumplist);
8292: JUMPHERE(jumplist->jump);
8293: jumplist = jumplist->next;
8294: }
8295:
8296: COMPILE_BACKTRACKINGPATH(current->top);
8297: if (current->topbacktracks)
8298: set_jumps(current->topbacktracks, LABEL());
8299: SLJIT_ASSERT(!current->nextbacktracks);
8300: }
8301: while (*cc == OP_ALT);
8302: SLJIT_ASSERT(!jumplist);
8303:
8304: if (cond != NULL)
8305: {
8306: SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
8307: assert = CURRENT_AS(bracket_backtrack)->u.assert;
8308: if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
8309: {
8310: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
8311: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8312: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
8313: }
8314: JUMPHERE(cond);
8315: }
8316:
8317: /* Free the STR_PTR. */
8318: if (private_data_ptr == 0)
8319: free_stack(common, 1);
8320: }
8321:
8322: if (offset != 0)
8323: {
8324: /* Using both tmp register is better for instruction scheduling. */
8325: if (common->optimized_cbracket[offset >> 1] != 0)
8326: {
8327: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8328: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8329: free_stack(common, 2);
8330: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8331: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
8332: }
8333: else
8334: {
8335: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8336: free_stack(common, 1);
8337: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
8338: }
8339: }
8340: else if (opcode == OP_SBRA || opcode == OP_SCOND)
8341: {
8342: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
8343: free_stack(common, 1);
8344: }
8345: else if (opcode == OP_ONCE)
8346: {
8347: cc = ccbegin + GET(ccbegin, 1);
8348: stacksize = needs_control_head ? 1 : 0;
8349:
8350: if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8351: {
8352: /* Reset head and drop saved frame. */
8353: stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
8354: }
8355: else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
8356: {
8357: /* The STR_PTR must be released. */
8358: stacksize++;
8359: }
8360: free_stack(common, stacksize);
8361:
8362: JUMPHERE(once);
8363: /* Restore previous private_data_ptr */
8364: if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8365: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
8366: else if (ket == OP_KETRMIN)
8367: {
8368: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8369: /* See the comment below. */
8370: free_stack(common, 2);
8371: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
8372: }
8373: }
8374:
8375: if (repeat_type == OP_EXACT)
8376: {
8377: OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8378: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
8379: CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
8380: }
8381: else if (ket == OP_KETRMAX)
8382: {
8383: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8384: if (bra != OP_BRAZERO)
8385: free_stack(common, 1);
8386:
8387: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8388: if (bra == OP_BRAZERO)
8389: {
8390: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8391: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
8392: JUMPHERE(brazero);
8393: free_stack(common, 1);
8394: }
8395: }
8396: else if (ket == OP_KETRMIN)
8397: {
8398: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8399:
8400: /* OP_ONCE removes everything in case of a backtrack, so we don't
8401: need to explicitly release the STR_PTR. The extra release would
8402: affect badly the free_stack(2) above. */
8403: if (opcode != OP_ONCE)
8404: free_stack(common, 1);
8405: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
8406: if (opcode == OP_ONCE)
8407: free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
8408: else if (bra == OP_BRAMINZERO)
8409: free_stack(common, 1);
8410: }
8411: else if (bra == OP_BRAZERO)
8412: {
8413: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8414: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
8415: JUMPHERE(brazero);
8416: }
8417: }
8418:
8419: static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8420: {
8421: DEFINE_COMPILER;
8422: int offset;
8423: struct sljit_jump *jump;
8424:
8425: if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
8426: {
8427: if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
8428: {
8429: offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
8430: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8431: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8432: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8433: if (common->capture_last_ptr != 0)
8434: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8435: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
8436: if (common->capture_last_ptr != 0)
8437: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
8438: }
8439: set_jumps(current->topbacktracks, LABEL());
8440: free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
8441: return;
8442: }
8443:
8444: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
8445: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8446:
8447: if (current->topbacktracks)
8448: {
8449: jump = JUMP(SLJIT_JUMP);
8450: set_jumps(current->topbacktracks, LABEL());
8451: /* Drop the stack frame. */
8452: free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
8453: JUMPHERE(jump);
8454: }
8455: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
8456: }
8457:
8458: static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8459: {
8460: assert_backtrack backtrack;
8461:
8462: current->top = NULL;
8463: current->topbacktracks = NULL;
8464: current->nextbacktracks = NULL;
8465: if (current->cc[1] > OP_ASSERTBACK_NOT)
8466: {
8467: /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
8468: compile_bracket_matchingpath(common, current->cc, current);
8469: compile_bracket_backtrackingpath(common, current->top);
8470: }
8471: else
8472: {
8473: memset(&backtrack, 0, sizeof(backtrack));
8474: backtrack.common.cc = current->cc;
8475: backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
8476: /* Manual call of compile_assert_matchingpath. */
8477: compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
8478: }
8479: SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
8480: }
8481:
8482: static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8483: {
8484: DEFINE_COMPILER;
8485: pcre_uchar opcode = *current->cc;
8486: struct sljit_label *loop;
8487: struct sljit_jump *jump;
8488:
8489: if (opcode == OP_THEN || opcode == OP_THEN_ARG)
8490: {
8491: if (common->then_trap != NULL)
8492: {
8493: SLJIT_ASSERT(common->control_head_ptr != 0);
8494:
8495: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
8496: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
8497: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
8498: jump = JUMP(SLJIT_JUMP);
8499:
8500: loop = LABEL();
8501: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
8502: JUMPHERE(jump);
8503: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
8504: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
8505: add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
8506: return;
8507: }
8508: else if (common->positive_assert)
8509: {
8510: add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
8511: return;
8512: }
8513: }
8514:
8515: if (common->local_exit)
8516: {
8517: if (common->quit_label == NULL)
8518: add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
8519: else
8520: JUMPTO(SLJIT_JUMP, common->quit_label);
8521: return;
8522: }
8523:
8524: if (opcode == OP_SKIP_ARG)
8525: {
8526: SLJIT_ASSERT(common->control_head_ptr != 0);
8527: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
8528: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
8529: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
8530: sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
8531: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
8532:
8533: OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
8534: add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
8535: return;
8536: }
8537:
8538: if (opcode == OP_SKIP)
8539: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8540: else
8541: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
8542: add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
8543: }
8544:
8545: static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8546: {
8547: DEFINE_COMPILER;
8548: struct sljit_jump *jump;
8549: int size;
8550:
8551: if (CURRENT_AS(then_trap_backtrack)->then_trap)
8552: {
8553: common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
8554: return;
8555: }
8556:
8557: size = CURRENT_AS(then_trap_backtrack)->framesize;
8558: size = 3 + (size < 0 ? 0 : size);
8559:
8560: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
8561: free_stack(common, size);
8562: jump = JUMP(SLJIT_JUMP);
8563:
8564: set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
8565: /* STACK_TOP is set by THEN. */
8566: if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
8567: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8568: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8569: free_stack(common, 3);
8570:
8571: JUMPHERE(jump);
8572: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
8573: }
8574:
8575: static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8576: {
8577: DEFINE_COMPILER;
8578: then_trap_backtrack *save_then_trap = common->then_trap;
8579:
8580: while (current)
8581: {
8582: if (current->nextbacktracks != NULL)
8583: set_jumps(current->nextbacktracks, LABEL());
8584: switch(*current->cc)
8585: {
8586: case OP_SET_SOM:
8587: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8588: free_stack(common, 1);
8589: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
8590: break;
8591:
8592: case OP_STAR:
8593: case OP_MINSTAR:
8594: case OP_PLUS:
8595: case OP_MINPLUS:
8596: case OP_QUERY:
8597: case OP_MINQUERY:
8598: case OP_UPTO:
8599: case OP_MINUPTO:
8600: case OP_EXACT:
8601: case OP_POSSTAR:
8602: case OP_POSPLUS:
8603: case OP_POSQUERY:
8604: case OP_POSUPTO:
8605: case OP_STARI:
8606: case OP_MINSTARI:
8607: case OP_PLUSI:
8608: case OP_MINPLUSI:
8609: case OP_QUERYI:
8610: case OP_MINQUERYI:
8611: case OP_UPTOI:
8612: case OP_MINUPTOI:
8613: case OP_EXACTI:
8614: case OP_POSSTARI:
8615: case OP_POSPLUSI:
8616: case OP_POSQUERYI:
8617: case OP_POSUPTOI:
8618: case OP_NOTSTAR:
8619: case OP_NOTMINSTAR:
8620: case OP_NOTPLUS:
8621: case OP_NOTMINPLUS:
8622: case OP_NOTQUERY:
8623: case OP_NOTMINQUERY:
8624: case OP_NOTUPTO:
8625: case OP_NOTMINUPTO:
8626: case OP_NOTEXACT:
8627: case OP_NOTPOSSTAR:
8628: case OP_NOTPOSPLUS:
8629: case OP_NOTPOSQUERY:
8630: case OP_NOTPOSUPTO:
8631: case OP_NOTSTARI:
8632: case OP_NOTMINSTARI:
8633: case OP_NOTPLUSI:
8634: case OP_NOTMINPLUSI:
8635: case OP_NOTQUERYI:
8636: case OP_NOTMINQUERYI:
8637: case OP_NOTUPTOI:
8638: case OP_NOTMINUPTOI:
8639: case OP_NOTEXACTI:
8640: case OP_NOTPOSSTARI:
8641: case OP_NOTPOSPLUSI:
8642: case OP_NOTPOSQUERYI:
8643: case OP_NOTPOSUPTOI:
8644: case OP_TYPESTAR:
8645: case OP_TYPEMINSTAR:
8646: case OP_TYPEPLUS:
8647: case OP_TYPEMINPLUS:
8648: case OP_TYPEQUERY:
8649: case OP_TYPEMINQUERY:
8650: case OP_TYPEUPTO:
8651: case OP_TYPEMINUPTO:
8652: case OP_TYPEEXACT:
8653: case OP_TYPEPOSSTAR:
8654: case OP_TYPEPOSPLUS:
8655: case OP_TYPEPOSQUERY:
8656: case OP_TYPEPOSUPTO:
8657: case OP_CLASS:
8658: case OP_NCLASS:
8659: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
8660: case OP_XCLASS:
8661: #endif
8662: compile_iterator_backtrackingpath(common, current);
8663: break;
8664:
8665: case OP_REF:
8666: case OP_REFI:
8667: compile_ref_iterator_backtrackingpath(common, current);
8668: break;
8669:
8670: case OP_RECURSE:
8671: compile_recurse_backtrackingpath(common, current);
8672: break;
8673:
8674: case OP_ASSERT:
8675: case OP_ASSERT_NOT:
8676: case OP_ASSERTBACK:
8677: case OP_ASSERTBACK_NOT:
8678: compile_assert_backtrackingpath(common, current);
8679: break;
8680:
8681: case OP_ONCE:
8682: case OP_ONCE_NC:
8683: case OP_BRA:
8684: case OP_CBRA:
8685: case OP_COND:
8686: case OP_SBRA:
8687: case OP_SCBRA:
8688: case OP_SCOND:
8689: compile_bracket_backtrackingpath(common, current);
8690: break;
8691:
8692: case OP_BRAZERO:
8693: if (current->cc[1] > OP_ASSERTBACK_NOT)
8694: compile_bracket_backtrackingpath(common, current);
8695: else
8696: compile_assert_backtrackingpath(common, current);
8697: break;
8698:
8699: case OP_BRAPOS:
8700: case OP_CBRAPOS:
8701: case OP_SBRAPOS:
8702: case OP_SCBRAPOS:
8703: case OP_BRAPOSZERO:
8704: compile_bracketpos_backtrackingpath(common, current);
8705: break;
8706:
8707: case OP_BRAMINZERO:
8708: compile_braminzero_backtrackingpath(common, current);
8709: break;
8710:
8711: case OP_MARK:
8712: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
8713: if (common->has_skip_arg)
8714: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8715: free_stack(common, common->has_skip_arg ? 5 : 1);
8716: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
8717: if (common->has_skip_arg)
8718: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
8719: break;
8720:
8721: case OP_THEN:
8722: case OP_THEN_ARG:
8723: case OP_PRUNE:
8724: case OP_PRUNE_ARG:
8725: case OP_SKIP:
8726: case OP_SKIP_ARG:
8727: compile_control_verb_backtrackingpath(common, current);
8728: break;
8729:
8730: case OP_COMMIT:
8731: if (!common->local_exit)
8732: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
8733: if (common->quit_label == NULL)
8734: add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
8735: else
8736: JUMPTO(SLJIT_JUMP, common->quit_label);
8737: break;
8738:
8739: case OP_CALLOUT:
8740: case OP_FAIL:
8741: case OP_ACCEPT:
8742: case OP_ASSERT_ACCEPT:
8743: set_jumps(current->topbacktracks, LABEL());
8744: break;
8745:
8746: case OP_THEN_TRAP:
8747: /* A virtual opcode for then traps. */
8748: compile_then_trap_backtrackingpath(common, current);
8749: break;
8750:
8751: default:
8752: SLJIT_ASSERT_STOP();
8753: break;
8754: }
8755: current = current->prev;
8756: }
8757: common->then_trap = save_then_trap;
8758: }
8759:
8760: static SLJIT_INLINE void compile_recurse(compiler_common *common)
8761: {
8762: DEFINE_COMPILER;
8763: pcre_uchar *cc = common->start + common->currententry->start;
8764: pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
8765: pcre_uchar *ccend = bracketend(cc);
8766: BOOL needs_control_head;
8767: int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
8768: int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
8769: int alternativesize;
8770: BOOL needs_frame;
8771: backtrack_common altbacktrack;
8772: struct sljit_jump *jump;
8773:
8774: /* Recurse captures then. */
8775: common->then_trap = NULL;
8776:
8777: SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
8778: needs_frame = framesize >= 0;
8779: if (!needs_frame)
8780: framesize = 0;
8781: alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
8782:
8783: SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
8784: common->currententry->entry = LABEL();
8785: set_jumps(common->currententry->calls, common->currententry->entry);
8786:
8787: sljit_emit_fast_enter(compiler, TMP2, 0);
8788: allocate_stack(common, private_data_size + framesize + alternativesize);
8789: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
8790: copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
8791: if (needs_control_head)
8792: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
8793: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0);
8794: if (needs_frame)
8795: init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
8796:
8797: if (alternativesize > 0)
8798: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8799:
8800: memset(&altbacktrack, 0, sizeof(backtrack_common));
8801: common->quit_label = NULL;
8802: common->accept_label = NULL;
8803: common->quit = NULL;
8804: common->accept = NULL;
8805: altbacktrack.cc = ccbegin;
8806: cc += GET(cc, 1);
8807: while (1)
8808: {
8809: altbacktrack.top = NULL;
8810: altbacktrack.topbacktracks = NULL;
8811:
8812: if (altbacktrack.cc != ccbegin)
8813: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8814:
8815: compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
8816: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8817: return;
8818:
8819: add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8820:
8821: compile_backtrackingpath(common, altbacktrack.top);
8822: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8823: return;
8824: set_jumps(altbacktrack.topbacktracks, LABEL());
8825:
8826: if (*cc != OP_ALT)
8827: break;
8828:
8829: altbacktrack.cc = cc + 1 + LINK_SIZE;
8830: cc += GET(cc, 1);
8831: }
8832:
8833: /* None of them matched. */
8834: OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
8835: jump = JUMP(SLJIT_JUMP);
8836:
8837: if (common->quit != NULL)
8838: {
8839: set_jumps(common->quit, LABEL());
8840: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
8841: if (needs_frame)
8842: {
8843: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
8844: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8845: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
8846: }
8847: OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
8848: common->quit = NULL;
8849: add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
8850: }
8851:
8852: set_jumps(common->accept, LABEL());
8853: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
8854: if (needs_frame)
8855: {
8856: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
8857: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8858: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
8859: }
8860: OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
8861:
8862: JUMPHERE(jump);
8863: if (common->quit != NULL)
8864: set_jumps(common->quit, LABEL());
8865: copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
8866: free_stack(common, private_data_size + framesize + alternativesize);
8867: if (needs_control_head)
8868: {
8869: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
8870: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
8871: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0);
8872: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
8873: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
8874: }
8875: else
8876: {
8877: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
8878: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
8879: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0);
8880: }
8881: sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
8882: }
8883:
8884: #undef COMPILE_BACKTRACKINGPATH
8885: #undef CURRENT_AS
8886:
8887: void
8888: PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
8889: {
8890: struct sljit_compiler *compiler;
8891: backtrack_common rootbacktrack;
8892: compiler_common common_data;
8893: compiler_common *common = &common_data;
8894: const pcre_uint8 *tables = re->tables;
8895: pcre_study_data *study;
8896: int private_data_size;
8897: pcre_uchar *ccend;
8898: executable_functions *functions;
8899: void *executable_func;
8900: sljit_uw executable_size;
8901: struct sljit_label *mainloop_label = NULL;
8902: struct sljit_label *continue_match_label;
8903: struct sljit_label *empty_match_found_label;
8904: struct sljit_label *empty_match_backtrack_label;
8905: struct sljit_label *reset_match_label;
8906: struct sljit_jump *jump;
8907: struct sljit_jump *minlength_check_failed = NULL;
8908: struct sljit_jump *reqbyte_notfound = NULL;
8909: struct sljit_jump *empty_match;
8910: struct sljit_label *quit_label;
8911:
8912: SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
8913: study = extra->study_data;
8914:
8915: if (!tables)
8916: tables = PRIV(default_tables);
8917:
8918: memset(&rootbacktrack, 0, sizeof(backtrack_common));
8919: memset(common, 0, sizeof(compiler_common));
8920: rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
8921:
8922: common->start = rootbacktrack.cc;
8923: common->fcc = tables + fcc_offset;
8924: common->lcc = (sljit_sw)(tables + lcc_offset);
8925: common->mode = mode;
8926: common->nltype = NLTYPE_FIXED;
8927: switch(re->options & PCRE_NEWLINE_BITS)
8928: {
8929: case 0:
8930: /* Compile-time default */
8931: switch(NEWLINE)
8932: {
8933: case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
8934: case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
8935: default: common->newline = NEWLINE; break;
8936: }
8937: break;
8938: case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
8939: case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
8940: case PCRE_NEWLINE_CR+
8941: PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
8942: case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
8943: case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
8944: default: return;
8945: }
8946: if ((re->options & PCRE_BSR_ANYCRLF) != 0)
8947: common->bsr_nltype = NLTYPE_ANYCRLF;
8948: else if ((re->options & PCRE_BSR_UNICODE) != 0)
8949: common->bsr_nltype = NLTYPE_ANY;
8950: else
8951: {
8952: #ifdef BSR_ANYCRLF
8953: common->bsr_nltype = NLTYPE_ANYCRLF;
8954: #else
8955: common->bsr_nltype = NLTYPE_ANY;
8956: #endif
8957: }
8958: common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
8959: common->ctypes = (sljit_sw)(tables + ctypes_offset);
8960: common->digits[0] = -2;
8961: common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset);
8962: common->name_count = re->name_count;
8963: common->name_entry_size = re->name_entry_size;
8964: common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
8965: #ifdef SUPPORT_UTF
8966: /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
8967: common->utf = (re->options & PCRE_UTF8) != 0;
8968: #ifdef SUPPORT_UCP
8969: common->use_ucp = (re->options & PCRE_UCP) != 0;
8970: #endif
8971: #endif /* SUPPORT_UTF */
8972: ccend = bracketend(rootbacktrack.cc);
8973:
8974: /* Calculate the local space size on the stack. */
8975: common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
8976: common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
8977: if (!common->optimized_cbracket)
8978: return;
8979: #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
8980: memset(common->optimized_cbracket, 0, re->top_bracket + 1);
8981: #else
8982: memset(common->optimized_cbracket, 1, re->top_bracket + 1);
8983: #endif
8984:
8985: SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
8986: #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
8987: common->capture_last_ptr = common->ovector_start;
8988: common->ovector_start += sizeof(sljit_sw);
8989: #endif
8990: if (!check_opcode_types(common, rootbacktrack.cc, ccend))
8991: {
8992: SLJIT_FREE(common->optimized_cbracket);
8993: return;
8994: }
8995:
8996: /* Checking flags and updating ovector_start. */
8997: if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
8998: {
8999: common->req_char_ptr = common->ovector_start;
9000: common->ovector_start += sizeof(sljit_sw);
9001: }
9002: if (mode != JIT_COMPILE)
9003: {
9004: common->start_used_ptr = common->ovector_start;
9005: common->ovector_start += sizeof(sljit_sw);
9006: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9007: {
9008: common->hit_start = common->ovector_start;
9009: common->ovector_start += 2 * sizeof(sljit_sw);
9010: }
9011: else
9012: {
9013: SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9014: common->needs_start_ptr = TRUE;
9015: }
9016: }
9017: if ((re->options & PCRE_FIRSTLINE) != 0)
9018: {
9019: common->first_line_end = common->ovector_start;
9020: common->ovector_start += sizeof(sljit_sw);
9021: }
9022: #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9023: common->control_head_ptr = 1;
9024: #endif
9025: if (common->control_head_ptr != 0)
9026: {
9027: common->control_head_ptr = common->ovector_start;
9028: common->ovector_start += sizeof(sljit_sw);
9029: }
9030: if (common->needs_start_ptr && common->has_set_som)
9031: {
9032: /* Saving the real start pointer is necessary. */
9033: common->start_ptr = common->ovector_start;
9034: common->ovector_start += sizeof(sljit_sw);
9035: }
9036: else
9037: common->needs_start_ptr = FALSE;
9038:
9039: /* Aligning ovector to even number of sljit words. */
9040: if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9041: common->ovector_start += sizeof(sljit_sw);
9042:
9043: if (common->start_ptr == 0)
9044: common->start_ptr = OVECTOR(0);
9045:
9046: /* Capturing brackets cannot be optimized if callouts are allowed. */
9047: if (common->capture_last_ptr != 0)
9048: memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9049:
9050: SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9051: common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9052:
9053: common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si));
9054: if (!common->private_data_ptrs)
9055: {
9056: SLJIT_FREE(common->optimized_cbracket);
9057: return;
9058: }
9059: memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));
9060:
9061: private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9062: set_private_data_ptrs(common, &private_data_size, ccend);
9063: if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9064: {
9065: SLJIT_FREE(common->private_data_ptrs);
9066: SLJIT_FREE(common->optimized_cbracket);
9067: return;
9068: }
9069:
9070: if (common->has_then)
9071: {
9072: common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc);
9073: if (!common->then_offsets)
9074: {
9075: SLJIT_FREE(common->optimized_cbracket);
9076: SLJIT_FREE(common->private_data_ptrs);
9077: return;
9078: }
9079: memset(common->then_offsets, 0, ccend - rootbacktrack.cc);
9080: set_then_offsets(common, rootbacktrack.cc, NULL);
9081: }
9082:
9083: compiler = sljit_create_compiler();
9084: if (!compiler)
9085: {
9086: SLJIT_FREE(common->optimized_cbracket);
9087: SLJIT_FREE(common->private_data_ptrs);
9088: if (common->has_then)
9089: SLJIT_FREE(common->then_offsets);
9090: return;
9091: }
9092: common->compiler = compiler;
9093:
9094: /* Main pcre_jit_exec entry. */
9095: sljit_emit_enter(compiler, 1, 5, 5, private_data_size);
9096:
9097: /* Register init. */
9098: reset_ovector(common, (re->top_bracket + 1) * 2);
9099: if (common->req_char_ptr != 0)
9100: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0);
9101:
9102: OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
9103: OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
9104: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9105: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9106: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9107: OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
9108: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9109: OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
9110: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
9111:
9112: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9113: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
9114: if (common->mark_ptr != 0)
9115: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
9116: if (common->control_head_ptr != 0)
9117: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
9118:
9119: /* Main part of the matching */
9120: if ((re->options & PCRE_ANCHORED) == 0)
9121: {
9122: mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
9123: continue_match_label = LABEL();
9124: /* Forward search if possible. */
9125: if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
9126: {
9127: if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
9128: { /* Do nothing */ }
9129: else if ((re->flags & PCRE_FIRSTSET) != 0)
9130: fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
9131: else if ((re->flags & PCRE_STARTLINE) != 0)
9132: fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
9133: else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
9134: fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
9135: }
9136: }
9137: else
9138: continue_match_label = LABEL();
9139:
9140: if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9141: {
9142: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9143: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
9144: minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0);
9145: }
9146: if (common->req_char_ptr != 0)
9147: reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
9148:
9149: /* Store the current STR_PTR in OVECTOR(0). */
9150: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
9151: /* Copy the limit of allowed recursions. */
9152: OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
9153: if (common->capture_last_ptr != 0)
9154: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
9155:
9156: if (common->needs_start_ptr)
9157: {
9158: SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
9159: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0);
9160: }
9161: else
9162: SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
9163:
9164: /* Copy the beginning of the string. */
9165: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9166: {
9167: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
9168: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
9169: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
9170: JUMPHERE(jump);
9171: }
9172: else if (mode == JIT_PARTIAL_HARD_COMPILE)
9173: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
9174:
9175: compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack);
9176: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9177: {
9178: sljit_free_compiler(compiler);
9179: SLJIT_FREE(common->optimized_cbracket);
9180: SLJIT_FREE(common->private_data_ptrs);
9181: if (common->has_then)
9182: SLJIT_FREE(common->then_offsets);
9183: return;
9184: }
9185:
9186: empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
9187: empty_match_found_label = LABEL();
9188:
9189: common->accept_label = LABEL();
9190: if (common->accept != NULL)
9191: set_jumps(common->accept, common->accept_label);
9192:
9193: /* This means we have a match. Update the ovector. */
9194: copy_ovector(common, re->top_bracket + 1);
9195: common->quit_label = common->forced_quit_label = LABEL();
9196: if (common->quit != NULL)
9197: set_jumps(common->quit, common->quit_label);
9198: if (common->forced_quit != NULL)
9199: set_jumps(common->forced_quit, common->forced_quit_label);
9200: if (minlength_check_failed != NULL)
9201: SET_LABEL(minlength_check_failed, common->forced_quit_label);
9202: sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
9203:
9204: if (mode != JIT_COMPILE)
9205: {
9206: common->partialmatchlabel = LABEL();
9207: set_jumps(common->partialmatch, common->partialmatchlabel);
9208: return_with_partial_match(common, common->quit_label);
9209: }
9210:
9211: empty_match_backtrack_label = LABEL();
9212: compile_backtrackingpath(common, rootbacktrack.top);
9213: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9214: {
9215: sljit_free_compiler(compiler);
9216: SLJIT_FREE(common->optimized_cbracket);
9217: SLJIT_FREE(common->private_data_ptrs);
9218: if (common->has_then)
9219: SLJIT_FREE(common->then_offsets);
9220: return;
9221: }
9222:
9223: SLJIT_ASSERT(rootbacktrack.prev == NULL);
9224: reset_match_label = LABEL();
9225:
9226: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9227: {
9228: /* Update hit_start only in the first time. */
9229: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
9230: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
9231: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
9232: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
9233: JUMPHERE(jump);
9234: }
9235:
9236: /* Check we have remaining characters. */
9237: if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
9238: {
9239: SLJIT_ASSERT(common->first_line_end != 0);
9240: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
9241: }
9242:
9243: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
9244:
9245: if ((re->options & PCRE_ANCHORED) == 0)
9246: {
9247: if ((re->options & PCRE_FIRSTLINE) == 0)
9248: CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
9249: else
9250: CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
9251: }
9252:
9253: /* No more remaining characters. */
9254: if (reqbyte_notfound != NULL)
9255: JUMPHERE(reqbyte_notfound);
9256:
9257: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9258: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
9259:
9260: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9261: JUMPTO(SLJIT_JUMP, common->quit_label);
9262:
9263: flush_stubs(common);
9264:
9265: JUMPHERE(empty_match);
9266: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9267: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9268: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
9269: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9270: CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
9271: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9272: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
9273: JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
9274:
9275: common->currententry = common->entries;
9276: common->local_exit = TRUE;
9277: quit_label = common->quit_label;
9278: while (common->currententry != NULL)
9279: {
9280: /* Might add new entries. */
9281: compile_recurse(common);
9282: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9283: {
9284: sljit_free_compiler(compiler);
9285: SLJIT_FREE(common->optimized_cbracket);
9286: SLJIT_FREE(common->private_data_ptrs);
9287: if (common->has_then)
9288: SLJIT_FREE(common->then_offsets);
9289: return;
9290: }
9291: flush_stubs(common);
9292: common->currententry = common->currententry->next;
9293: }
9294: common->local_exit = FALSE;
9295: common->quit_label = quit_label;
9296:
9297: /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
9298: /* This is a (really) rare case. */
9299: set_jumps(common->stackalloc, LABEL());
9300: /* RETURN_ADDR is not a saved register. */
9301: sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
9302: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
9303: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9304: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9305: OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
9306: OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
9307:
9308: sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
9309: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9310: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9311: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9312: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
9313: OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
9314: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
9315: sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
9316:
9317: /* Allocation failed. */
9318: JUMPHERE(jump);
9319: /* We break the return address cache here, but this is a really rare case. */
9320: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
9321: JUMPTO(SLJIT_JUMP, common->quit_label);
9322:
9323: /* Call limit reached. */
9324: set_jumps(common->calllimit, LABEL());
9325: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
9326: JUMPTO(SLJIT_JUMP, common->quit_label);
9327:
9328: if (common->revertframes != NULL)
9329: {
9330: set_jumps(common->revertframes, LABEL());
9331: do_revertframes(common);
9332: }
9333: if (common->wordboundary != NULL)
9334: {
9335: set_jumps(common->wordboundary, LABEL());
9336: check_wordboundary(common);
9337: }
9338: if (common->anynewline != NULL)
9339: {
9340: set_jumps(common->anynewline, LABEL());
9341: check_anynewline(common);
9342: }
9343: if (common->hspace != NULL)
9344: {
9345: set_jumps(common->hspace, LABEL());
9346: check_hspace(common);
9347: }
9348: if (common->vspace != NULL)
9349: {
9350: set_jumps(common->vspace, LABEL());
9351: check_vspace(common);
9352: }
9353: if (common->casefulcmp != NULL)
9354: {
9355: set_jumps(common->casefulcmp, LABEL());
9356: do_casefulcmp(common);
9357: }
9358: if (common->caselesscmp != NULL)
9359: {
9360: set_jumps(common->caselesscmp, LABEL());
9361: do_caselesscmp(common);
9362: }
9363: if (common->reset_match != NULL)
9364: {
9365: set_jumps(common->reset_match, LABEL());
9366: do_reset_match(common, (re->top_bracket + 1) * 2);
9367: CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
9368: OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9369: JUMPTO(SLJIT_JUMP, reset_match_label);
9370: }
9371: #ifdef SUPPORT_UTF
9372: #ifndef COMPILE_PCRE32
9373: if (common->utfreadchar != NULL)
9374: {
9375: set_jumps(common->utfreadchar, LABEL());
9376: do_utfreadchar(common);
9377: }
9378: #endif /* !COMPILE_PCRE32 */
9379: #ifdef COMPILE_PCRE8
9380: if (common->utfreadtype8 != NULL)
9381: {
9382: set_jumps(common->utfreadtype8, LABEL());
9383: do_utfreadtype8(common);
9384: }
9385: #endif /* COMPILE_PCRE8 */
9386: #endif /* SUPPORT_UTF */
9387: #ifdef SUPPORT_UCP
9388: if (common->getucd != NULL)
9389: {
9390: set_jumps(common->getucd, LABEL());
9391: do_getucd(common);
9392: }
9393: #endif
9394:
9395: SLJIT_FREE(common->optimized_cbracket);
9396: SLJIT_FREE(common->private_data_ptrs);
9397: if (common->has_then)
9398: SLJIT_FREE(common->then_offsets);
9399:
9400: executable_func = sljit_generate_code(compiler);
9401: executable_size = sljit_get_generated_code_size(compiler);
9402: sljit_free_compiler(compiler);
9403: if (executable_func == NULL)
9404: return;
9405:
9406: /* Reuse the function descriptor if possible. */
9407: if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
9408: functions = (executable_functions *)extra->executable_jit;
9409: else
9410: {
9411: /* Note: If your memory-checker has flagged the allocation below as a
9412: * memory leak, it is probably because you either forgot to call
9413: * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
9414: * pcre16_extra) object, or you called said function after having
9415: * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
9416: * of the object. (The function will only free the JIT data if the
9417: * bit remains set, as the bit indicates that the pointer to the data
9418: * is valid.)
9419: */
9420: functions = SLJIT_MALLOC(sizeof(executable_functions));
9421: if (functions == NULL)
9422: {
9423: /* This case is highly unlikely since we just recently
9424: freed a lot of memory. Although not impossible. */
9425: sljit_free_code(executable_func);
9426: return;
9427: }
9428: memset(functions, 0, sizeof(executable_functions));
9429: functions->top_bracket = (re->top_bracket + 1) * 2;
9430: functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
9431: extra->executable_jit = functions;
9432: extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
9433: }
9434:
9435: functions->executable_funcs[mode] = executable_func;
9436: functions->executable_sizes[mode] = executable_size;
9437: }
9438:
9439: static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
9440: {
9441: union {
9442: void* executable_func;
9443: jit_function call_executable_func;
9444: } convert_executable_func;
9445: pcre_uint8 local_space[MACHINE_STACK_SIZE];
9446: struct sljit_stack local_stack;
9447:
9448: local_stack.top = (sljit_sw)&local_space;
9449: local_stack.base = local_stack.top;
9450: local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
9451: local_stack.max_limit = local_stack.limit;
9452: arguments->stack = &local_stack;
9453: convert_executable_func.executable_func = executable_func;
9454: return convert_executable_func.call_executable_func(arguments);
9455: }
9456:
9457: int
9458: PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
9459: int length, int start_offset, int options, int *offsets, int offset_count)
9460: {
9461: executable_functions *functions = (executable_functions *)extra_data->executable_jit;
9462: union {
9463: void* executable_func;
9464: jit_function call_executable_func;
9465: } convert_executable_func;
9466: jit_arguments arguments;
9467: int max_offset_count;
9468: int retval;
9469: int mode = JIT_COMPILE;
9470:
9471: if ((options & PCRE_PARTIAL_HARD) != 0)
9472: mode = JIT_PARTIAL_HARD_COMPILE;
9473: else if ((options & PCRE_PARTIAL_SOFT) != 0)
9474: mode = JIT_PARTIAL_SOFT_COMPILE;
9475:
9476: if (functions->executable_funcs[mode] == NULL)
9477: return PCRE_ERROR_JIT_BADOPTION;
9478:
9479: /* Sanity checks should be handled by pcre_exec. */
9480: arguments.str = subject + start_offset;
9481: arguments.begin = subject;
9482: arguments.end = subject + length;
9483: arguments.mark_ptr = NULL;
9484: /* JIT decreases this value less frequently than the interpreter. */
9485: arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
9486: if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
9487: arguments.limit_match = functions->limit_match;
9488: arguments.notbol = (options & PCRE_NOTBOL) != 0;
9489: arguments.noteol = (options & PCRE_NOTEOL) != 0;
9490: arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
9491: arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
9492: arguments.offsets = offsets;
9493: arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
9494: arguments.real_offset_count = offset_count;
9495:
9496: /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
9497: the output vector for storing captured strings, with the remainder used as
9498: workspace. We don't need the workspace here. For compatibility, we limit the
9499: number of captured strings in the same way as pcre_exec(), so that the user
9500: gets the same result with and without JIT. */
9501:
9502: if (offset_count != 2)
9503: offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
9504: max_offset_count = functions->top_bracket;
9505: if (offset_count > max_offset_count)
9506: offset_count = max_offset_count;
9507: arguments.offset_count = offset_count;
9508:
9509: if (functions->callback)
9510: arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
9511: else
9512: arguments.stack = (struct sljit_stack *)functions->userdata;
9513:
9514: if (arguments.stack == NULL)
9515: retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
9516: else
9517: {
9518: convert_executable_func.executable_func = functions->executable_funcs[mode];
9519: retval = convert_executable_func.call_executable_func(&arguments);
9520: }
9521:
9522: if (retval * 2 > offset_count)
9523: retval = 0;
9524: if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
9525: *(extra_data->mark) = arguments.mark_ptr;
9526:
9527: return retval;
9528: }
9529:
9530: #if defined COMPILE_PCRE8
9531: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
9532: pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
9533: PCRE_SPTR subject, int length, int start_offset, int options,
9534: int *offsets, int offset_count, pcre_jit_stack *stack)
9535: #elif defined COMPILE_PCRE16
9536: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
9537: pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
9538: PCRE_SPTR16 subject, int length, int start_offset, int options,
9539: int *offsets, int offset_count, pcre16_jit_stack *stack)
9540: #elif defined COMPILE_PCRE32
9541: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
9542: pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
9543: PCRE_SPTR32 subject, int length, int start_offset, int options,
9544: int *offsets, int offset_count, pcre32_jit_stack *stack)
9545: #endif
9546: {
9547: pcre_uchar *subject_ptr = (pcre_uchar *)subject;
9548: executable_functions *functions = (executable_functions *)extra_data->executable_jit;
9549: union {
9550: void* executable_func;
9551: jit_function call_executable_func;
9552: } convert_executable_func;
9553: jit_arguments arguments;
9554: int max_offset_count;
9555: int retval;
9556: int mode = JIT_COMPILE;
9557:
9558: SLJIT_UNUSED_ARG(argument_re);
9559:
9560: /* Plausibility checks */
9561: if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
9562:
9563: if ((options & PCRE_PARTIAL_HARD) != 0)
9564: mode = JIT_PARTIAL_HARD_COMPILE;
9565: else if ((options & PCRE_PARTIAL_SOFT) != 0)
9566: mode = JIT_PARTIAL_SOFT_COMPILE;
9567:
9568: if (functions->executable_funcs[mode] == NULL)
9569: return PCRE_ERROR_JIT_BADOPTION;
9570:
9571: /* Sanity checks should be handled by pcre_exec. */
9572: arguments.stack = (struct sljit_stack *)stack;
9573: arguments.str = subject_ptr + start_offset;
9574: arguments.begin = subject_ptr;
9575: arguments.end = subject_ptr + length;
9576: arguments.mark_ptr = NULL;
9577: /* JIT decreases this value less frequently than the interpreter. */
9578: arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
9579: if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
9580: arguments.limit_match = functions->limit_match;
9581: arguments.notbol = (options & PCRE_NOTBOL) != 0;
9582: arguments.noteol = (options & PCRE_NOTEOL) != 0;
9583: arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
9584: arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
9585: arguments.offsets = offsets;
9586: arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
9587: arguments.real_offset_count = offset_count;
9588:
9589: /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
9590: the output vector for storing captured strings, with the remainder used as
9591: workspace. We don't need the workspace here. For compatibility, we limit the
9592: number of captured strings in the same way as pcre_exec(), so that the user
9593: gets the same result with and without JIT. */
9594:
9595: if (offset_count != 2)
9596: offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
9597: max_offset_count = functions->top_bracket;
9598: if (offset_count > max_offset_count)
9599: offset_count = max_offset_count;
9600: arguments.offset_count = offset_count;
9601:
9602: convert_executable_func.executable_func = functions->executable_funcs[mode];
9603: retval = convert_executable_func.call_executable_func(&arguments);
9604:
9605: if (retval * 2 > offset_count)
9606: retval = 0;
9607: if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
9608: *(extra_data->mark) = arguments.mark_ptr;
9609:
9610: return retval;
9611: }
9612:
9613: void
9614: PRIV(jit_free)(void *executable_funcs)
9615: {
9616: int i;
9617: executable_functions *functions = (executable_functions *)executable_funcs;
9618: for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
9619: {
9620: if (functions->executable_funcs[i] != NULL)
9621: sljit_free_code(functions->executable_funcs[i]);
9622: }
9623: SLJIT_FREE(functions);
9624: }
9625:
9626: int
9627: PRIV(jit_get_size)(void *executable_funcs)
9628: {
9629: int i;
9630: sljit_uw size = 0;
9631: sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
9632: for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
9633: size += executable_sizes[i];
9634: return (int)size;
9635: }
9636:
9637: const char*
9638: PRIV(jit_get_target)(void)
9639: {
9640: return sljit_get_platform_name();
9641: }
9642:
9643: #if defined COMPILE_PCRE8
9644: PCRE_EXP_DECL pcre_jit_stack *
9645: pcre_jit_stack_alloc(int startsize, int maxsize)
9646: #elif defined COMPILE_PCRE16
9647: PCRE_EXP_DECL pcre16_jit_stack *
9648: pcre16_jit_stack_alloc(int startsize, int maxsize)
9649: #elif defined COMPILE_PCRE32
9650: PCRE_EXP_DECL pcre32_jit_stack *
9651: pcre32_jit_stack_alloc(int startsize, int maxsize)
9652: #endif
9653: {
9654: if (startsize < 1 || maxsize < 1)
9655: return NULL;
9656: if (startsize > maxsize)
9657: startsize = maxsize;
9658: startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
9659: maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
9660: return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
9661: }
9662:
9663: #if defined COMPILE_PCRE8
9664: PCRE_EXP_DECL void
9665: pcre_jit_stack_free(pcre_jit_stack *stack)
9666: #elif defined COMPILE_PCRE16
9667: PCRE_EXP_DECL void
9668: pcre16_jit_stack_free(pcre16_jit_stack *stack)
9669: #elif defined COMPILE_PCRE32
9670: PCRE_EXP_DECL void
9671: pcre32_jit_stack_free(pcre32_jit_stack *stack)
9672: #endif
9673: {
9674: sljit_free_stack((struct sljit_stack *)stack);
9675: }
9676:
9677: #if defined COMPILE_PCRE8
9678: PCRE_EXP_DECL void
9679: pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
9680: #elif defined COMPILE_PCRE16
9681: PCRE_EXP_DECL void
9682: pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
9683: #elif defined COMPILE_PCRE32
9684: PCRE_EXP_DECL void
9685: pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
9686: #endif
9687: {
9688: executable_functions *functions;
9689: if (extra != NULL &&
9690: (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
9691: extra->executable_jit != NULL)
9692: {
9693: functions = (executable_functions *)extra->executable_jit;
9694: functions->callback = callback;
9695: functions->userdata = userdata;
9696: }
9697: }
9698:
9699: #else /* SUPPORT_JIT */
9700:
9701: /* These are dummy functions to avoid linking errors when JIT support is not
9702: being compiled. */
9703:
9704: #if defined COMPILE_PCRE8
9705: PCRE_EXP_DECL pcre_jit_stack *
9706: pcre_jit_stack_alloc(int startsize, int maxsize)
9707: #elif defined COMPILE_PCRE16
9708: PCRE_EXP_DECL pcre16_jit_stack *
9709: pcre16_jit_stack_alloc(int startsize, int maxsize)
9710: #elif defined COMPILE_PCRE32
9711: PCRE_EXP_DECL pcre32_jit_stack *
9712: pcre32_jit_stack_alloc(int startsize, int maxsize)
9713: #endif
9714: {
9715: (void)startsize;
9716: (void)maxsize;
9717: return NULL;
9718: }
9719:
9720: #if defined COMPILE_PCRE8
9721: PCRE_EXP_DECL void
9722: pcre_jit_stack_free(pcre_jit_stack *stack)
9723: #elif defined COMPILE_PCRE16
9724: PCRE_EXP_DECL void
9725: pcre16_jit_stack_free(pcre16_jit_stack *stack)
9726: #elif defined COMPILE_PCRE32
9727: PCRE_EXP_DECL void
9728: pcre32_jit_stack_free(pcre32_jit_stack *stack)
9729: #endif
9730: {
9731: (void)stack;
9732: }
9733:
9734: #if defined COMPILE_PCRE8
9735: PCRE_EXP_DECL void
9736: pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
9737: #elif defined COMPILE_PCRE16
9738: PCRE_EXP_DECL void
9739: pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
9740: #elif defined COMPILE_PCRE32
9741: PCRE_EXP_DECL void
9742: pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
9743: #endif
9744: {
9745: (void)extra;
9746: (void)callback;
9747: (void)userdata;
9748: }
9749:
9750: #endif
9751:
9752: /* End of pcre_jit_compile.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>