Annotation of embedaddon/pcre/pcre_jit_compile.c, revision 1.1.1.5
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
1.1.1.4 misho 9: Copyright (c) 1997-2013 University of Cambridge
1.1 misho 10:
11: The machine code generator part (this module) was written by Zoltan Herczeg
1.1.1.4 misho 12: Copyright (c) 2010-2013
1.1 misho 13:
14: -----------------------------------------------------------------------------
15: Redistribution and use in source and binary forms, with or without
16: modification, are permitted provided that the following conditions are met:
17:
18: * Redistributions of source code must retain the above copyright notice,
19: this list of conditions and the following disclaimer.
20:
21: * Redistributions in binary form must reproduce the above copyright
22: notice, this list of conditions and the following disclaimer in the
23: documentation and/or other materials provided with the distribution.
24:
25: * Neither the name of the University of Cambridge nor the names of its
26: contributors may be used to endorse or promote products derived from
27: this software without specific prior written permission.
28:
29: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39: POSSIBILITY OF SUCH DAMAGE.
40: -----------------------------------------------------------------------------
41: */
42:
43: #ifdef HAVE_CONFIG_H
44: #include "config.h"
45: #endif
46:
47: #include "pcre_internal.h"
48:
1.1.1.4 misho 49: #if defined SUPPORT_JIT
1.1 misho 50:
51: /* All-in-one: Since we use the JIT compiler only from here,
52: we just include it. This way we don't need to touch the build
53: system files. */
54:
1.1.1.2 misho 55: #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56: #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
1.1 misho 57: #define SLJIT_CONFIG_AUTO 1
58: #define SLJIT_CONFIG_STATIC 1
59: #define SLJIT_VERBOSE 0
60: #define SLJIT_DEBUG 0
61:
62: #include "sljit/sljitLir.c"
63:
64: #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
1.1.1.2 misho 65: #error Unsupported architecture
1.1 misho 66: #endif
67:
1.1.1.4 misho 68: /* Defines for debugging purposes. */
1.1 misho 69:
1.1.1.4 misho 70: /* 1 - Use unoptimized capturing brackets.
71: 2 - Enable capture_last_ptr (includes option 1). */
72: /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73:
74: /* 1 - Always have a control head. */
75: /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76:
77: /* Allocate memory for the regex stack on the real machine stack.
78: Fast, but limited size. */
79: #define MACHINE_STACK_SIZE 32768
80:
81: /* Growth rate for stack allocated by the OS. Should be the multiply
82: of page size. */
1.1 misho 83: #define STACK_GROWTH_RATE 8192
84:
85: /* Enable to check that the allocation could destroy temporaries. */
86: #if defined SLJIT_DEBUG && SLJIT_DEBUG
87: #define DESTROY_REGISTERS 1
88: #endif
89:
90: /*
91: Short summary about the backtracking mechanism empolyed by the jit code generator:
92:
93: The code generator follows the recursive nature of the PERL compatible regular
94: expressions. The basic blocks of regular expressions are condition checkers
95: whose execute different commands depending on the result of the condition check.
96: The relationship between the operators can be horizontal (concatenation) and
1.1.1.3 misho 97: vertical (sub-expression) (See struct backtrack_common for more details).
1.1 misho 98:
99: 'ab' - 'a' and 'b' regexps are concatenated
100: 'a+' - 'a' is the sub-expression of the '+' operator
101:
102: The condition checkers are boolean (true/false) checkers. Machine code is generated
103: for the checker itself and for the actions depending on the result of the checker.
1.1.1.4 misho 104: The 'true' case is called as the matching path (expected path), and the other is called as
1.1.1.3 misho 105: the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
1.1.1.4 misho 106: branches on the matching path.
1.1 misho 107:
108: Greedy star operator (*) :
1.1.1.4 misho 109: Matching path: match happens.
1.1.1.3 misho 110: Backtrack path: match failed.
1.1 misho 111: Non-greedy star operator (*?) :
1.1.1.4 misho 112: Matching path: no need to perform a match.
1.1.1.3 misho 113: Backtrack path: match is required.
1.1 misho 114:
115: The following example shows how the code generated for a capturing bracket
116: with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117: we have the following regular expression:
118:
119: A(B|C)D
120:
121: The generated code will be the following:
122:
1.1.1.4 misho 123: A matching path
124: '(' matching path (pushing arguments to the stack)
125: B matching path
126: ')' matching path (pushing arguments to the stack)
127: D matching path
1.1 misho 128: return with successful match
129:
1.1.1.3 misho 130: D backtrack path
131: ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132: B backtrack path
1.1 misho 133: C expected path
1.1.1.4 misho 134: jump to D matching path
1.1.1.3 misho 135: C backtrack path
136: A backtrack path
1.1 misho 137:
1.1.1.3 misho 138: Notice, that the order of backtrack code paths are the opposite of the fast
1.1 misho 139: code paths. In this way the topmost value on the stack is always belong
1.1.1.3 misho 140: to the current backtrack code path. The backtrack path must check
1.1 misho 141: whether there is a next alternative. If so, it needs to jump back to
1.1.1.4 misho 142: the matching path eventually. Otherwise it needs to clear out its own stack
1.1.1.3 misho 143: frame and continue the execution on the backtrack code paths.
1.1 misho 144: */
145:
146: /*
147: Saved stack frames:
148:
1.1.1.4 misho 149: Atomic blocks and asserts require reloading the values of private data
150: when the backtrack mechanism performed. Because of OP_RECURSE, the data
1.1 misho 151: are not necessarly known in compile time, thus we need a dynamic restore
152: mechanism.
153:
154: The stack frames are stored in a chain list, and have the following format:
155: ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156:
1.1.1.4 misho 157: Thus we can restore the private data to a particular point in the stack.
1.1 misho 158: */
159:
160: typedef struct jit_arguments {
161: /* Pointers first. */
162: struct sljit_stack *stack;
1.1.1.2 misho 163: const pcre_uchar *str;
164: const pcre_uchar *begin;
165: const pcre_uchar *end;
1.1 misho 166: int *offsets;
1.1.1.3 misho 167: pcre_uchar *uchar_ptr;
168: pcre_uchar *mark_ptr;
1.1.1.4 misho 169: void *callout_data;
1.1 misho 170: /* Everything else after. */
1.1.1.4 misho 171: pcre_uint32 limit_match;
172: int real_offset_count;
173: int offset_count;
1.1.1.2 misho 174: pcre_uint8 notbol;
175: pcre_uint8 noteol;
176: pcre_uint8 notempty;
177: pcre_uint8 notempty_atstart;
1.1 misho 178: } jit_arguments;
179:
1.1.1.3 misho 180: typedef struct executable_functions {
181: void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
1.1.1.2 misho 182: PUBL(jit_callback) callback;
1.1 misho 183: void *userdata;
1.1.1.4 misho 184: pcre_uint32 top_bracket;
185: pcre_uint32 limit_match;
1.1.1.3 misho 186: sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187: } executable_functions;
1.1 misho 188:
189: typedef struct jump_list {
190: struct sljit_jump *jump;
191: struct jump_list *next;
192: } jump_list;
193:
194: typedef struct stub_list {
195: struct sljit_jump *start;
1.1.1.4 misho 196: struct sljit_label *quit;
1.1 misho 197: struct stub_list *next;
198: } stub_list;
199:
1.1.1.4 misho 200: enum frame_types {
201: no_frame = -1,
202: no_stack = -2
203: };
204:
205: enum control_types {
206: type_mark = 0,
207: type_then_trap = 1
208: };
209:
1.1 misho 210: typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211:
212: /* The following structure is the key data type for the recursive
1.1.1.4 misho 213: code generator. It is allocated by compile_matchingpath, and contains
214: the arguments for compile_backtrackingpath. Must be the first member
1.1 misho 215: of its descendants. */
1.1.1.3 misho 216: typedef struct backtrack_common {
1.1 misho 217: /* Concatenation stack. */
1.1.1.3 misho 218: struct backtrack_common *prev;
219: jump_list *nextbacktracks;
1.1 misho 220: /* Internal stack (for component operators). */
1.1.1.3 misho 221: struct backtrack_common *top;
222: jump_list *topbacktracks;
1.1 misho 223: /* Opcode pointer. */
1.1.1.2 misho 224: pcre_uchar *cc;
1.1.1.3 misho 225: } backtrack_common;
1.1 misho 226:
1.1.1.3 misho 227: typedef struct assert_backtrack {
228: backtrack_common common;
1.1 misho 229: jump_list *condfailed;
1.1.1.4 misho 230: /* Less than 0 if a frame is not needed. */
1.1 misho 231: int framesize;
232: /* Points to our private memory word on the stack. */
1.1.1.4 misho 233: int private_data_ptr;
1.1 misho 234: /* For iterators. */
1.1.1.4 misho 235: struct sljit_label *matchingpath;
1.1.1.3 misho 236: } assert_backtrack;
1.1 misho 237:
1.1.1.3 misho 238: typedef struct bracket_backtrack {
239: backtrack_common common;
1.1 misho 240: /* Where to coninue if an alternative is successfully matched. */
1.1.1.4 misho 241: struct sljit_label *alternative_matchingpath;
1.1 misho 242: /* For rmin and rmax iterators. */
1.1.1.4 misho 243: struct sljit_label *recursive_matchingpath;
1.1 misho 244: /* For greedy ? operator. */
1.1.1.4 misho 245: struct sljit_label *zero_matchingpath;
1.1 misho 246: /* Contains the branches of a failed condition. */
247: union {
248: /* Both for OP_COND, OP_SCOND. */
249: jump_list *condfailed;
1.1.1.3 misho 250: assert_backtrack *assert;
1.1.1.4 misho 251: /* For OP_ONCE. Less than 0 if not needed. */
1.1 misho 252: int framesize;
253: } u;
254: /* Points to our private memory word on the stack. */
1.1.1.4 misho 255: int private_data_ptr;
1.1.1.3 misho 256: } bracket_backtrack;
1.1 misho 257:
1.1.1.3 misho 258: typedef struct bracketpos_backtrack {
259: backtrack_common common;
1.1 misho 260: /* Points to our private memory word on the stack. */
1.1.1.4 misho 261: int private_data_ptr;
1.1 misho 262: /* Reverting stack is needed. */
263: int framesize;
264: /* Allocated stack size. */
265: int stacksize;
1.1.1.3 misho 266: } bracketpos_backtrack;
1.1 misho 267:
1.1.1.3 misho 268: typedef struct braminzero_backtrack {
269: backtrack_common common;
1.1.1.4 misho 270: struct sljit_label *matchingpath;
1.1.1.3 misho 271: } braminzero_backtrack;
1.1 misho 272:
1.1.1.3 misho 273: typedef struct iterator_backtrack {
274: backtrack_common common;
1.1 misho 275: /* Next iteration. */
1.1.1.4 misho 276: struct sljit_label *matchingpath;
1.1.1.3 misho 277: } iterator_backtrack;
1.1 misho 278:
279: typedef struct recurse_entry {
280: struct recurse_entry *next;
281: /* Contains the function entry. */
282: struct sljit_label *entry;
283: /* Collects the calls until the function is not created. */
284: jump_list *calls;
285: /* Points to the starting opcode. */
1.1.1.4 misho 286: sljit_sw start;
1.1 misho 287: } recurse_entry;
288:
1.1.1.3 misho 289: typedef struct recurse_backtrack {
290: backtrack_common common;
1.1.1.4 misho 291: BOOL inlined_pattern;
1.1.1.3 misho 292: } recurse_backtrack;
1.1 misho 293:
1.1.1.4 misho 294: #define OP_THEN_TRAP OP_TABLE_LENGTH
295:
296: typedef struct then_trap_backtrack {
297: backtrack_common common;
298: /* If then_trap is not NULL, this structure contains the real
299: then_trap for the backtracking path. */
300: struct then_trap_backtrack *then_trap;
301: /* Points to the starting opcode. */
302: sljit_sw start;
303: /* Exit point for the then opcodes of this alternative. */
304: jump_list *quit;
305: /* Frame size of the current alternative. */
306: int framesize;
307: } then_trap_backtrack;
308:
309: #define MAX_RANGE_SIZE 6
310:
1.1 misho 311: typedef struct compiler_common {
1.1.1.4 misho 312: /* The sljit ceneric compiler. */
1.1 misho 313: struct sljit_compiler *compiler;
1.1.1.4 misho 314: /* First byte code. */
1.1.1.2 misho 315: pcre_uchar *start;
1.1.1.4 misho 316: /* Maps private data offset to each opcode. */
317: sljit_si *private_data_ptrs;
318: /* Tells whether the capturing bracket is optimized. */
319: pcre_uint8 *optimized_cbracket;
320: /* Tells whether the starting offset is a target of then. */
321: pcre_uint8 *then_offsets;
322: /* Current position where a THEN must jump. */
323: then_trap_backtrack *then_trap;
324: /* Starting offset of private data for capturing brackets. */
325: int cbra_ptr;
326: /* Output vector starting point. Must be divisible by 2. */
1.1.1.3 misho 327: int ovector_start;
328: /* Last known position of the requested byte. */
329: int req_char_ptr;
330: /* Head of the last recursion. */
1.1.1.4 misho 331: int recursive_head_ptr;
1.1.1.3 misho 332: /* First inspected character for partial matching. */
333: int start_used_ptr;
334: /* Starting pointer for partial soft matches. */
335: int hit_start;
336: /* End pointer of the first line. */
337: int first_line_end;
338: /* Points to the marked string. */
339: int mark_ptr;
1.1.1.4 misho 340: /* Recursive control verb management chain. */
341: int control_head_ptr;
342: /* Points to the last matched capture block index. */
343: int capture_last_ptr;
344: /* Points to the starting position of the current match. */
345: int start_ptr;
1.1.1.3 misho 346:
1.1.1.4 misho 347: /* Flipped and lower case tables. */
1.1.1.2 misho 348: const pcre_uint8 *fcc;
1.1.1.4 misho 349: sljit_sw lcc;
350: /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
1.1.1.3 misho 351: int mode;
1.1.1.4 misho 352: /* \K is found in the pattern. */
353: BOOL has_set_som;
354: /* (*SKIP:arg) is found in the pattern. */
355: BOOL has_skip_arg;
356: /* (*THEN) is found in the pattern. */
357: BOOL has_then;
358: /* Needs to know the start position anytime. */
359: BOOL needs_start_ptr;
360: /* Currently in recurse or negative assert. */
361: BOOL local_exit;
362: /* Currently in a positive assert. */
363: BOOL positive_assert;
364: /* Newline control. */
1.1 misho 365: int nltype;
366: int newline;
367: int bsr_nltype;
1.1.1.4 misho 368: /* Dollar endonly. */
1.1 misho 369: int endonly;
1.1.1.4 misho 370: /* Tables. */
371: sljit_sw ctypes;
372: int digits[2 + MAX_RANGE_SIZE];
373: /* Named capturing brackets. */
1.1.1.5 ! misho 374: pcre_uchar *name_table;
1.1.1.4 misho 375: sljit_sw name_count;
376: sljit_sw name_entry_size;
1.1.1.3 misho 377:
378: /* Labels and jump lists. */
379: struct sljit_label *partialmatchlabel;
1.1.1.4 misho 380: struct sljit_label *quit_label;
381: struct sljit_label *forced_quit_label;
382: struct sljit_label *accept_label;
1.1 misho 383: stub_list *stubs;
384: recurse_entry *entries;
385: recurse_entry *currententry;
1.1.1.3 misho 386: jump_list *partialmatch;
1.1.1.4 misho 387: jump_list *quit;
388: jump_list *positive_assert_quit;
389: jump_list *forced_quit;
1.1 misho 390: jump_list *accept;
391: jump_list *calllimit;
392: jump_list *stackalloc;
393: jump_list *revertframes;
394: jump_list *wordboundary;
395: jump_list *anynewline;
396: jump_list *hspace;
397: jump_list *vspace;
398: jump_list *casefulcmp;
399: jump_list *caselesscmp;
1.1.1.4 misho 400: jump_list *reset_match;
1.1 misho 401: BOOL jscript_compat;
1.1.1.2 misho 402: #ifdef SUPPORT_UTF
403: BOOL utf;
1.1 misho 404: #ifdef SUPPORT_UCP
1.1.1.2 misho 405: BOOL use_ucp;
1.1 misho 406: #endif
1.1.1.4 misho 407: #ifndef COMPILE_PCRE32
1.1.1.2 misho 408: jump_list *utfreadchar;
1.1.1.4 misho 409: #endif
1.1.1.2 misho 410: #ifdef COMPILE_PCRE8
411: jump_list *utfreadtype8;
1.1 misho 412: #endif
1.1.1.2 misho 413: #endif /* SUPPORT_UTF */
1.1 misho 414: #ifdef SUPPORT_UCP
415: jump_list *getucd;
416: #endif
417: } compiler_common;
418:
419: /* For byte_sequence_compare. */
420:
421: typedef struct compare_context {
422: int length;
423: int sourcereg;
424: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
1.1.1.2 misho 425: int ucharptr;
1.1 misho 426: union {
1.1.1.4 misho 427: sljit_si asint;
1.1.1.2 misho 428: sljit_uh asushort;
1.1.1.4 misho 429: #if defined COMPILE_PCRE8
1.1 misho 430: sljit_ub asbyte;
1.1.1.2 misho 431: sljit_ub asuchars[4];
1.1.1.4 misho 432: #elif defined COMPILE_PCRE16
1.1.1.2 misho 433: sljit_uh asuchars[2];
1.1.1.4 misho 434: #elif defined COMPILE_PCRE32
435: sljit_ui asuchars[1];
1.1.1.2 misho 436: #endif
1.1 misho 437: } c;
438: union {
1.1.1.4 misho 439: sljit_si asint;
1.1.1.2 misho 440: sljit_uh asushort;
1.1.1.4 misho 441: #if defined COMPILE_PCRE8
1.1 misho 442: sljit_ub asbyte;
1.1.1.2 misho 443: sljit_ub asuchars[4];
1.1.1.4 misho 444: #elif defined COMPILE_PCRE16
1.1.1.2 misho 445: sljit_uh asuchars[2];
1.1.1.4 misho 446: #elif defined COMPILE_PCRE32
447: sljit_ui asuchars[1];
1.1.1.2 misho 448: #endif
1.1 misho 449: } oc;
450: #endif
451: } compare_context;
452:
1.1.1.2 misho 453: /* Undefine sljit macros. */
454: #undef CMP
455:
1.1 misho 456: /* Used for accessing the elements of the stack. */
1.1.1.4 misho 457: #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
1.1 misho 458:
1.1.1.4 misho 459: #define TMP1 SLJIT_SCRATCH_REG1
460: #define TMP2 SLJIT_SCRATCH_REG3
1.1 misho 461: #define TMP3 SLJIT_TEMPORARY_EREG2
1.1.1.2 misho 462: #define STR_PTR SLJIT_SAVED_REG1
463: #define STR_END SLJIT_SAVED_REG2
1.1.1.4 misho 464: #define STACK_TOP SLJIT_SCRATCH_REG2
1.1.1.2 misho 465: #define STACK_LIMIT SLJIT_SAVED_REG3
466: #define ARGUMENTS SLJIT_SAVED_EREG1
1.1.1.4 misho 467: #define COUNT_MATCH SLJIT_SAVED_EREG2
1.1 misho 468: #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469:
1.1.1.4 misho 470: /* Local space layout. */
1.1 misho 471: /* These two locals can be used by the current opcode. */
1.1.1.4 misho 472: #define LOCALS0 (0 * sizeof(sljit_sw))
473: #define LOCALS1 (1 * sizeof(sljit_sw))
1.1 misho 474: /* Two local variables for possessive quantifiers (char1 cannot use them). */
1.1.1.4 misho 475: #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476: #define POSSESSIVE1 (3 * sizeof(sljit_sw))
1.1 misho 477: /* Max limit of recursions. */
1.1.1.4 misho 478: #define LIMIT_MATCH (4 * sizeof(sljit_sw))
1.1 misho 479: /* The output vector is stored on the stack, and contains pointers
480: to characters. The vector data is divided into two groups: the first
481: group contains the start / end character pointers, and the second is
482: the start pointers when the end of the capturing group has not yet reached. */
1.1.1.3 misho 483: #define OVECTOR_START (common->ovector_start)
1.1.1.5 ! misho 484: #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
! 485: #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
1.1.1.4 misho 486: #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
1.1.1.2 misho 487:
1.1.1.4 misho 488: #if defined COMPILE_PCRE8
1.1.1.2 misho 489: #define MOV_UCHAR SLJIT_MOV_UB
490: #define MOVU_UCHAR SLJIT_MOVU_UB
1.1.1.4 misho 491: #elif defined COMPILE_PCRE16
1.1.1.2 misho 492: #define MOV_UCHAR SLJIT_MOV_UH
493: #define MOVU_UCHAR SLJIT_MOVU_UH
1.1.1.4 misho 494: #elif defined COMPILE_PCRE32
495: #define MOV_UCHAR SLJIT_MOV_UI
496: #define MOVU_UCHAR SLJIT_MOVU_UI
1.1.1.2 misho 497: #else
498: #error Unsupported compiling mode
499: #endif
1.1 misho 500:
501: /* Shortcuts. */
502: #define DEFINE_COMPILER \
503: struct sljit_compiler *compiler = common->compiler
504: #define OP1(op, dst, dstw, src, srcw) \
505: sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506: #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507: sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508: #define LABEL() \
509: sljit_emit_label(compiler)
510: #define JUMP(type) \
511: sljit_emit_jump(compiler, (type))
512: #define JUMPTO(type, label) \
513: sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514: #define JUMPHERE(jump) \
515: sljit_set_label((jump), sljit_emit_label(compiler))
1.1.1.4 misho 516: #define SET_LABEL(jump, label) \
517: sljit_set_label((jump), (label))
1.1 misho 518: #define CMP(type, src1, src1w, src2, src2w) \
519: sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520: #define CMPTO(type, src1, src1w, src2, src2w, label) \
521: sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
1.1.1.4 misho 522: #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523: sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
1.1.1.3 misho 524: #define GET_LOCAL_BASE(dst, dstw, offset) \
525: sljit_get_local_base(compiler, (dst), (dstw), (offset))
1.1 misho 526:
1.1.1.2 misho 527: static pcre_uchar* bracketend(pcre_uchar* cc)
1.1 misho 528: {
529: SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530: do cc += GET(cc, 1); while (*cc == OP_ALT);
531: SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532: cc += 1 + LINK_SIZE;
533: return cc;
534: }
535:
536: /* Functions whose might need modification for all new supported opcodes:
537: next_opcode
1.1.1.4 misho 538: check_opcode_types
539: set_private_data_ptrs
1.1 misho 540: get_framesize
541: init_frame
1.1.1.4 misho 542: get_private_data_copy_length
543: copy_private_data
544: compile_matchingpath
545: compile_backtrackingpath
1.1 misho 546: */
547:
1.1.1.2 misho 548: static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
1.1 misho 549: {
550: SLJIT_UNUSED_ARG(common);
551: switch(*cc)
552: {
553: case OP_SOD:
554: case OP_SOM:
555: case OP_SET_SOM:
556: case OP_NOT_WORD_BOUNDARY:
557: case OP_WORD_BOUNDARY:
558: case OP_NOT_DIGIT:
559: case OP_DIGIT:
560: case OP_NOT_WHITESPACE:
561: case OP_WHITESPACE:
562: case OP_NOT_WORDCHAR:
563: case OP_WORDCHAR:
564: case OP_ANY:
565: case OP_ALLANY:
1.1.1.4 misho 566: case OP_NOTPROP:
567: case OP_PROP:
1.1 misho 568: case OP_ANYNL:
569: case OP_NOT_HSPACE:
570: case OP_HSPACE:
571: case OP_NOT_VSPACE:
572: case OP_VSPACE:
573: case OP_EXTUNI:
574: case OP_EODN:
575: case OP_EOD:
576: case OP_CIRC:
577: case OP_CIRCM:
578: case OP_DOLL:
579: case OP_DOLLM:
580: case OP_CRSTAR:
581: case OP_CRMINSTAR:
582: case OP_CRPLUS:
583: case OP_CRMINPLUS:
584: case OP_CRQUERY:
585: case OP_CRMINQUERY:
1.1.1.4 misho 586: case OP_CRRANGE:
587: case OP_CRMINRANGE:
1.1.1.5 ! misho 588: case OP_CRPOSSTAR:
! 589: case OP_CRPOSPLUS:
! 590: case OP_CRPOSQUERY:
! 591: case OP_CRPOSRANGE:
1.1.1.4 misho 592: case OP_CLASS:
593: case OP_NCLASS:
594: case OP_REF:
595: case OP_REFI:
1.1.1.5 ! misho 596: case OP_DNREF:
! 597: case OP_DNREFI:
1.1.1.4 misho 598: case OP_RECURSE:
599: case OP_CALLOUT:
600: case OP_ALT:
601: case OP_KET:
602: case OP_KETRMAX:
603: case OP_KETRMIN:
604: case OP_KETRPOS:
605: case OP_REVERSE:
606: case OP_ASSERT:
607: case OP_ASSERT_NOT:
608: case OP_ASSERTBACK:
609: case OP_ASSERTBACK_NOT:
610: case OP_ONCE:
611: case OP_ONCE_NC:
612: case OP_BRA:
613: case OP_BRAPOS:
614: case OP_CBRA:
615: case OP_CBRAPOS:
616: case OP_COND:
617: case OP_SBRA:
618: case OP_SBRAPOS:
619: case OP_SCBRA:
620: case OP_SCBRAPOS:
621: case OP_SCOND:
622: case OP_CREF:
1.1.1.5 ! misho 623: case OP_DNCREF:
1.1.1.4 misho 624: case OP_RREF:
1.1.1.5 ! misho 625: case OP_DNRREF:
1.1 misho 626: case OP_DEF:
627: case OP_BRAZERO:
628: case OP_BRAMINZERO:
629: case OP_BRAPOSZERO:
1.1.1.4 misho 630: case OP_PRUNE:
631: case OP_SKIP:
632: case OP_THEN:
1.1.1.3 misho 633: case OP_COMMIT:
1.1 misho 634: case OP_FAIL:
635: case OP_ACCEPT:
636: case OP_ASSERT_ACCEPT:
1.1.1.4 misho 637: case OP_CLOSE:
1.1 misho 638: case OP_SKIPZERO:
1.1.1.4 misho 639: return cc + PRIV(OP_lengths)[*cc];
1.1 misho 640:
641: case OP_CHAR:
642: case OP_CHARI:
643: case OP_NOT:
644: case OP_NOTI:
645: case OP_STAR:
646: case OP_MINSTAR:
647: case OP_PLUS:
648: case OP_MINPLUS:
649: case OP_QUERY:
650: case OP_MINQUERY:
1.1.1.4 misho 651: case OP_UPTO:
652: case OP_MINUPTO:
653: case OP_EXACT:
1.1 misho 654: case OP_POSSTAR:
655: case OP_POSPLUS:
656: case OP_POSQUERY:
1.1.1.4 misho 657: case OP_POSUPTO:
1.1 misho 658: case OP_STARI:
659: case OP_MINSTARI:
660: case OP_PLUSI:
661: case OP_MINPLUSI:
662: case OP_QUERYI:
663: case OP_MINQUERYI:
1.1.1.4 misho 664: case OP_UPTOI:
665: case OP_MINUPTOI:
666: case OP_EXACTI:
1.1 misho 667: case OP_POSSTARI:
668: case OP_POSPLUSI:
669: case OP_POSQUERYI:
1.1.1.4 misho 670: case OP_POSUPTOI:
1.1 misho 671: case OP_NOTSTAR:
672: case OP_NOTMINSTAR:
673: case OP_NOTPLUS:
674: case OP_NOTMINPLUS:
675: case OP_NOTQUERY:
676: case OP_NOTMINQUERY:
1.1.1.4 misho 677: case OP_NOTUPTO:
678: case OP_NOTMINUPTO:
679: case OP_NOTEXACT:
1.1 misho 680: case OP_NOTPOSSTAR:
681: case OP_NOTPOSPLUS:
682: case OP_NOTPOSQUERY:
1.1.1.4 misho 683: case OP_NOTPOSUPTO:
1.1 misho 684: case OP_NOTSTARI:
685: case OP_NOTMINSTARI:
686: case OP_NOTPLUSI:
687: case OP_NOTMINPLUSI:
688: case OP_NOTQUERYI:
689: case OP_NOTMINQUERYI:
690: case OP_NOTUPTOI:
691: case OP_NOTMINUPTOI:
692: case OP_NOTEXACTI:
1.1.1.4 misho 693: case OP_NOTPOSSTARI:
694: case OP_NOTPOSPLUSI:
695: case OP_NOTPOSQUERYI:
1.1 misho 696: case OP_NOTPOSUPTOI:
1.1.1.4 misho 697: cc += PRIV(OP_lengths)[*cc];
1.1.1.2 misho 698: #ifdef SUPPORT_UTF
699: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1.1 misho 700: #endif
701: return cc;
702:
1.1.1.4 misho 703: /* Special cases. */
704: case OP_TYPESTAR:
705: case OP_TYPEMINSTAR:
706: case OP_TYPEPLUS:
707: case OP_TYPEMINPLUS:
708: case OP_TYPEQUERY:
709: case OP_TYPEMINQUERY:
1.1 misho 710: case OP_TYPEUPTO:
711: case OP_TYPEMINUPTO:
712: case OP_TYPEEXACT:
1.1.1.4 misho 713: case OP_TYPEPOSSTAR:
714: case OP_TYPEPOSPLUS:
715: case OP_TYPEPOSQUERY:
1.1 misho 716: case OP_TYPEPOSUPTO:
1.1.1.4 misho 717: return cc + PRIV(OP_lengths)[*cc] - 1;
1.1 misho 718:
1.1.1.4 misho 719: case OP_ANYBYTE:
720: #ifdef SUPPORT_UTF
721: if (common->utf) return NULL;
722: #endif
723: return cc + 1;
1.1 misho 724:
1.1.1.2 misho 725: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 726: case OP_XCLASS:
727: return cc + GET(cc, 1);
728: #endif
729:
1.1.1.3 misho 730: case OP_MARK:
1.1.1.4 misho 731: case OP_PRUNE_ARG:
732: case OP_SKIP_ARG:
733: case OP_THEN_ARG:
1.1.1.3 misho 734: return cc + 1 + 2 + cc[1];
735:
1.1 misho 736: default:
1.1.1.4 misho 737: /* All opcodes are supported now! */
738: SLJIT_ASSERT_STOP();
1.1 misho 739: return NULL;
740: }
741: }
742:
1.1.1.4 misho 743: static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1.1 misho 744: {
1.1.1.5 ! misho 745: int count;
! 746: pcre_uchar *slot;
1.1.1.4 misho 747:
1.1 misho 748: /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
749: while (cc < ccend)
750: {
751: switch(*cc)
752: {
1.1.1.3 misho 753: case OP_SET_SOM:
754: common->has_set_som = TRUE;
755: cc += 1;
756: break;
757:
1.1.1.4 misho 758: case OP_REF:
759: case OP_REFI:
760: common->optimized_cbracket[GET2(cc, 1)] = 0;
761: cc += 1 + IMM2_SIZE;
1.1 misho 762: break;
763:
764: case OP_CBRAPOS:
765: case OP_SCBRAPOS:
1.1.1.4 misho 766: common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1.1.1.2 misho 767: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 768: break;
769:
770: case OP_COND:
1.1.1.4 misho 771: case OP_SCOND:
772: /* Only AUTO_CALLOUT can insert this opcode. We do
773: not intend to support this case. */
774: if (cc[1 + LINK_SIZE] == OP_CALLOUT)
775: return FALSE;
1.1 misho 776: cc += 1 + LINK_SIZE;
777: break;
778:
1.1.1.4 misho 779: case OP_CREF:
1.1.1.5 ! misho 780: common->optimized_cbracket[GET2(cc, 1)] = 0;
1.1.1.4 misho 781: cc += 1 + IMM2_SIZE;
782: break;
783:
1.1.1.5 ! misho 784: case OP_DNREF:
! 785: case OP_DNREFI:
! 786: case OP_DNCREF:
! 787: count = GET2(cc, 1 + IMM2_SIZE);
! 788: slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
! 789: while (count-- > 0)
! 790: {
! 791: common->optimized_cbracket[GET2(slot, 0)] = 0;
! 792: slot += common->name_entry_size;
1.1.1.4 misho 793: }
1.1.1.5 ! misho 794: cc += 1 + 2 * IMM2_SIZE;
1.1.1.4 misho 795: break;
796:
1.1.1.3 misho 797: case OP_RECURSE:
798: /* Set its value only once. */
1.1.1.4 misho 799: if (common->recursive_head_ptr == 0)
1.1.1.3 misho 800: {
1.1.1.4 misho 801: common->recursive_head_ptr = common->ovector_start;
802: common->ovector_start += sizeof(sljit_sw);
1.1.1.3 misho 803: }
804: cc += 1 + LINK_SIZE;
805: break;
806:
1.1.1.4 misho 807: case OP_CALLOUT:
808: if (common->capture_last_ptr == 0)
809: {
810: common->capture_last_ptr = common->ovector_start;
811: common->ovector_start += sizeof(sljit_sw);
812: }
813: cc += 2 + 2 * LINK_SIZE;
814: break;
815:
816: case OP_THEN_ARG:
817: common->has_then = TRUE;
818: common->control_head_ptr = 1;
819: /* Fall through. */
820:
821: case OP_PRUNE_ARG:
822: common->needs_start_ptr = TRUE;
823: /* Fall through. */
824:
1.1.1.3 misho 825: case OP_MARK:
826: if (common->mark_ptr == 0)
827: {
828: common->mark_ptr = common->ovector_start;
1.1.1.4 misho 829: common->ovector_start += sizeof(sljit_sw);
1.1.1.3 misho 830: }
831: cc += 1 + 2 + cc[1];
832: break;
833:
1.1.1.4 misho 834: case OP_THEN:
835: common->has_then = TRUE;
836: common->control_head_ptr = 1;
837: /* Fall through. */
838:
839: case OP_PRUNE:
840: case OP_SKIP:
841: common->needs_start_ptr = TRUE;
842: cc += 1;
843: break;
844:
845: case OP_SKIP_ARG:
846: common->control_head_ptr = 1;
847: common->has_skip_arg = TRUE;
848: cc += 1 + 2 + cc[1];
849: break;
850:
1.1 misho 851: default:
852: cc = next_opcode(common, cc);
853: if (cc == NULL)
1.1.1.4 misho 854: return FALSE;
1.1 misho 855: break;
856: }
857: }
1.1.1.4 misho 858: return TRUE;
1.1 misho 859: }
860:
1.1.1.4 misho 861: static int get_class_iterator_size(pcre_uchar *cc)
1.1 misho 862: {
1.1.1.4 misho 863: switch(*cc)
1.1 misho 864: {
1.1.1.4 misho 865: case OP_CRSTAR:
866: case OP_CRPLUS:
867: return 2;
1.1 misho 868:
1.1.1.4 misho 869: case OP_CRMINSTAR:
870: case OP_CRMINPLUS:
871: case OP_CRQUERY:
872: case OP_CRMINQUERY:
873: return 1;
1.1 misho 874:
1.1.1.4 misho 875: case OP_CRRANGE:
876: case OP_CRMINRANGE:
877: if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
878: return 0;
879: return 2;
1.1 misho 880:
1.1.1.4 misho 881: default:
882: return 0;
883: }
884: }
885:
886: static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
887: {
888: pcre_uchar *end = bracketend(begin);
889: pcre_uchar *next;
890: pcre_uchar *next_end;
891: pcre_uchar *max_end;
892: pcre_uchar type;
893: sljit_sw length = end - begin;
894: int min, max, i;
895:
896: /* Detect fixed iterations first. */
897: if (end[-(1 + LINK_SIZE)] != OP_KET)
898: return FALSE;
899:
900: /* Already detected repeat. */
901: if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
902: return TRUE;
903:
904: next = end;
905: min = 1;
906: while (1)
907: {
908: if (*next != *begin)
909: break;
910: next_end = bracketend(next);
911: if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1.1 misho 912: break;
1.1.1.4 misho 913: next = next_end;
914: min++;
915: }
916:
917: if (min == 2)
918: return FALSE;
919:
920: max = 0;
921: max_end = next;
922: if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
923: {
924: type = *next;
925: while (1)
926: {
927: if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
928: break;
929: next_end = bracketend(next + 2 + LINK_SIZE);
930: if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
931: break;
932: next = next_end;
933: max++;
934: }
935:
936: if (next[0] == type && next[1] == *begin && max >= 1)
937: {
938: next_end = bracketend(next + 1);
939: if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
940: {
941: for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
942: if (*next_end != OP_KET)
943: break;
944:
945: if (i == max)
946: {
947: common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
948: common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
949: /* +2 the original and the last. */
950: common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
951: if (min == 1)
952: return TRUE;
953: min--;
954: max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
955: }
956: }
1.1 misho 957: }
958: }
1.1.1.4 misho 959:
960: if (min >= 3)
961: {
962: common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
963: common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
964: common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
965: return TRUE;
966: }
967:
968: return FALSE;
969: }
970:
971: #define CASE_ITERATOR_PRIVATE_DATA_1 \
972: case OP_MINSTAR: \
973: case OP_MINPLUS: \
974: case OP_QUERY: \
975: case OP_MINQUERY: \
976: case OP_MINSTARI: \
977: case OP_MINPLUSI: \
978: case OP_QUERYI: \
979: case OP_MINQUERYI: \
980: case OP_NOTMINSTAR: \
981: case OP_NOTMINPLUS: \
982: case OP_NOTQUERY: \
983: case OP_NOTMINQUERY: \
984: case OP_NOTMINSTARI: \
985: case OP_NOTMINPLUSI: \
986: case OP_NOTQUERYI: \
987: case OP_NOTMINQUERYI:
988:
989: #define CASE_ITERATOR_PRIVATE_DATA_2A \
990: case OP_STAR: \
991: case OP_PLUS: \
992: case OP_STARI: \
993: case OP_PLUSI: \
994: case OP_NOTSTAR: \
995: case OP_NOTPLUS: \
996: case OP_NOTSTARI: \
997: case OP_NOTPLUSI:
998:
999: #define CASE_ITERATOR_PRIVATE_DATA_2B \
1000: case OP_UPTO: \
1001: case OP_MINUPTO: \
1002: case OP_UPTOI: \
1003: case OP_MINUPTOI: \
1004: case OP_NOTUPTO: \
1005: case OP_NOTMINUPTO: \
1006: case OP_NOTUPTOI: \
1007: case OP_NOTMINUPTOI:
1008:
1009: #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1010: case OP_TYPEMINSTAR: \
1011: case OP_TYPEMINPLUS: \
1012: case OP_TYPEQUERY: \
1013: case OP_TYPEMINQUERY:
1014:
1015: #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1016: case OP_TYPESTAR: \
1017: case OP_TYPEPLUS:
1018:
1019: #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1020: case OP_TYPEUPTO: \
1021: case OP_TYPEMINUPTO:
1022:
1023: static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1024: {
1025: pcre_uchar *cc = common->start;
1026: pcre_uchar *alternative;
1027: pcre_uchar *end = NULL;
1028: int private_data_ptr = *private_data_start;
1029: int space, size, bracketlen;
1030:
1031: while (cc < ccend)
1032: {
1033: space = 0;
1034: size = 0;
1035: bracketlen = 0;
1036: if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1037: return;
1038:
1039: if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1040: if (detect_repeat(common, cc))
1041: {
1042: /* These brackets are converted to repeats, so no global
1043: based single character repeat is allowed. */
1044: if (cc >= end)
1045: end = bracketend(cc);
1046: }
1047:
1048: switch(*cc)
1049: {
1050: case OP_KET:
1051: if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1052: {
1053: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1054: private_data_ptr += sizeof(sljit_sw);
1055: cc += common->private_data_ptrs[cc + 1 - common->start];
1056: }
1057: cc += 1 + LINK_SIZE;
1058: break;
1059:
1060: case OP_ASSERT:
1061: case OP_ASSERT_NOT:
1062: case OP_ASSERTBACK:
1063: case OP_ASSERTBACK_NOT:
1064: case OP_ONCE:
1065: case OP_ONCE_NC:
1066: case OP_BRAPOS:
1067: case OP_SBRA:
1068: case OP_SBRAPOS:
1069: case OP_SCOND:
1070: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1071: private_data_ptr += sizeof(sljit_sw);
1072: bracketlen = 1 + LINK_SIZE;
1073: break;
1074:
1075: case OP_CBRAPOS:
1076: case OP_SCBRAPOS:
1077: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1078: private_data_ptr += sizeof(sljit_sw);
1079: bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1080: break;
1081:
1082: case OP_COND:
1083: /* Might be a hidden SCOND. */
1084: alternative = cc + GET(cc, 1);
1085: if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1086: {
1087: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1088: private_data_ptr += sizeof(sljit_sw);
1089: }
1090: bracketlen = 1 + LINK_SIZE;
1091: break;
1092:
1093: case OP_BRA:
1094: bracketlen = 1 + LINK_SIZE;
1095: break;
1096:
1097: case OP_CBRA:
1098: case OP_SCBRA:
1099: bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1100: break;
1101:
1102: CASE_ITERATOR_PRIVATE_DATA_1
1103: space = 1;
1104: size = -2;
1105: break;
1106:
1107: CASE_ITERATOR_PRIVATE_DATA_2A
1108: space = 2;
1109: size = -2;
1110: break;
1111:
1112: CASE_ITERATOR_PRIVATE_DATA_2B
1113: space = 2;
1114: size = -(2 + IMM2_SIZE);
1115: break;
1116:
1117: CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1118: space = 1;
1119: size = 1;
1120: break;
1121:
1122: CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1123: if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1124: space = 2;
1125: size = 1;
1126: break;
1127:
1128: CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1129: if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1130: space = 2;
1131: size = 1 + IMM2_SIZE;
1132: break;
1133:
1134: case OP_CLASS:
1135: case OP_NCLASS:
1136: size += 1 + 32 / sizeof(pcre_uchar);
1137: space = get_class_iterator_size(cc + size);
1138: break;
1139:
1140: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1141: case OP_XCLASS:
1142: size = GET(cc, 1);
1143: space = get_class_iterator_size(cc + size);
1144: break;
1145: #endif
1146:
1147: default:
1148: cc = next_opcode(common, cc);
1149: SLJIT_ASSERT(cc != NULL);
1150: break;
1151: }
1152:
1153: /* Character iterators, which are not inside a repeated bracket,
1154: gets a private slot instead of allocating it on the stack. */
1155: if (space > 0 && cc >= end)
1156: {
1157: common->private_data_ptrs[cc - common->start] = private_data_ptr;
1158: private_data_ptr += sizeof(sljit_sw) * space;
1159: }
1160:
1161: if (size != 0)
1162: {
1163: if (size < 0)
1164: {
1165: cc += -size;
1166: #ifdef SUPPORT_UTF
1167: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1168: #endif
1169: }
1170: else
1171: cc += size;
1172: }
1173:
1174: if (bracketlen > 0)
1175: {
1176: if (cc >= end)
1177: {
1178: end = bracketend(cc);
1179: if (end[-1 - LINK_SIZE] == OP_KET)
1180: end = NULL;
1181: }
1182: cc += bracketlen;
1183: }
1184: }
1185: *private_data_start = private_data_ptr;
1.1 misho 1186: }
1187:
1.1.1.4 misho 1188: /* Returns with a frame_types (always < 0) if no need for frame. */
1189: static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1.1 misho 1190: {
1191: int length = 0;
1.1.1.4 misho 1192: int possessive = 0;
1193: BOOL stack_restore = FALSE;
1.1.1.3 misho 1194: BOOL setsom_found = recursive;
1195: BOOL setmark_found = recursive;
1.1.1.4 misho 1196: /* The last capture is a local variable even for recursions. */
1197: BOOL capture_last_found = FALSE;
1.1 misho 1198:
1.1.1.4 misho 1199: #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1200: SLJIT_ASSERT(common->control_head_ptr != 0);
1201: *needs_control_head = TRUE;
1202: #else
1203: *needs_control_head = FALSE;
1204: #endif
1205:
1206: if (ccend == NULL)
1.1 misho 1207: {
1.1.1.4 misho 1208: ccend = bracketend(cc) - (1 + LINK_SIZE);
1209: if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1210: {
1211: possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1212: /* This is correct regardless of common->capture_last_ptr. */
1213: capture_last_found = TRUE;
1214: }
1215: cc = next_opcode(common, cc);
1.1 misho 1216: }
1217:
1218: SLJIT_ASSERT(cc != NULL);
1219: while (cc < ccend)
1220: switch(*cc)
1221: {
1222: case OP_SET_SOM:
1.1.1.3 misho 1223: SLJIT_ASSERT(common->has_set_som);
1.1.1.4 misho 1224: stack_restore = TRUE;
1.1 misho 1225: if (!setsom_found)
1226: {
1227: length += 2;
1228: setsom_found = TRUE;
1229: }
1.1.1.3 misho 1230: cc += 1;
1231: break;
1232:
1233: case OP_MARK:
1.1.1.4 misho 1234: case OP_PRUNE_ARG:
1235: case OP_THEN_ARG:
1.1.1.3 misho 1236: SLJIT_ASSERT(common->mark_ptr != 0);
1.1.1.4 misho 1237: stack_restore = TRUE;
1.1.1.3 misho 1238: if (!setmark_found)
1239: {
1240: length += 2;
1241: setmark_found = TRUE;
1242: }
1.1.1.4 misho 1243: if (common->control_head_ptr != 0)
1244: *needs_control_head = TRUE;
1.1.1.3 misho 1245: cc += 1 + 2 + cc[1];
1246: break;
1247:
1248: case OP_RECURSE:
1.1.1.4 misho 1249: stack_restore = TRUE;
1.1.1.3 misho 1250: if (common->has_set_som && !setsom_found)
1251: {
1252: length += 2;
1253: setsom_found = TRUE;
1254: }
1255: if (common->mark_ptr != 0 && !setmark_found)
1256: {
1257: length += 2;
1258: setmark_found = TRUE;
1259: }
1.1.1.4 misho 1260: if (common->capture_last_ptr != 0 && !capture_last_found)
1261: {
1262: length += 2;
1263: capture_last_found = TRUE;
1264: }
1.1.1.3 misho 1265: cc += 1 + LINK_SIZE;
1.1 misho 1266: break;
1267:
1268: case OP_CBRA:
1269: case OP_CBRAPOS:
1270: case OP_SCBRA:
1271: case OP_SCBRAPOS:
1.1.1.4 misho 1272: stack_restore = TRUE;
1273: if (common->capture_last_ptr != 0 && !capture_last_found)
1274: {
1275: length += 2;
1276: capture_last_found = TRUE;
1277: }
1.1 misho 1278: length += 3;
1.1.1.2 misho 1279: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 1280: break;
1281:
1282: default:
1.1.1.4 misho 1283: stack_restore = TRUE;
1284: /* Fall through. */
1285:
1286: case OP_NOT_WORD_BOUNDARY:
1287: case OP_WORD_BOUNDARY:
1288: case OP_NOT_DIGIT:
1289: case OP_DIGIT:
1290: case OP_NOT_WHITESPACE:
1291: case OP_WHITESPACE:
1292: case OP_NOT_WORDCHAR:
1293: case OP_WORDCHAR:
1294: case OP_ANY:
1295: case OP_ALLANY:
1296: case OP_ANYBYTE:
1297: case OP_NOTPROP:
1298: case OP_PROP:
1299: case OP_ANYNL:
1300: case OP_NOT_HSPACE:
1301: case OP_HSPACE:
1302: case OP_NOT_VSPACE:
1303: case OP_VSPACE:
1304: case OP_EXTUNI:
1305: case OP_EODN:
1306: case OP_EOD:
1307: case OP_CIRC:
1308: case OP_CIRCM:
1309: case OP_DOLL:
1310: case OP_DOLLM:
1311: case OP_CHAR:
1312: case OP_CHARI:
1313: case OP_NOT:
1314: case OP_NOTI:
1315:
1316: case OP_EXACT:
1317: case OP_POSSTAR:
1318: case OP_POSPLUS:
1319: case OP_POSQUERY:
1320: case OP_POSUPTO:
1321:
1322: case OP_EXACTI:
1323: case OP_POSSTARI:
1324: case OP_POSPLUSI:
1325: case OP_POSQUERYI:
1326: case OP_POSUPTOI:
1327:
1328: case OP_NOTEXACT:
1329: case OP_NOTPOSSTAR:
1330: case OP_NOTPOSPLUS:
1331: case OP_NOTPOSQUERY:
1332: case OP_NOTPOSUPTO:
1333:
1334: case OP_NOTEXACTI:
1335: case OP_NOTPOSSTARI:
1336: case OP_NOTPOSPLUSI:
1337: case OP_NOTPOSQUERYI:
1338: case OP_NOTPOSUPTOI:
1339:
1340: case OP_TYPEEXACT:
1341: case OP_TYPEPOSSTAR:
1342: case OP_TYPEPOSPLUS:
1343: case OP_TYPEPOSQUERY:
1344: case OP_TYPEPOSUPTO:
1345:
1346: case OP_CLASS:
1347: case OP_NCLASS:
1348: case OP_XCLASS:
1349:
1.1 misho 1350: cc = next_opcode(common, cc);
1351: SLJIT_ASSERT(cc != NULL);
1352: break;
1353: }
1354:
1355: /* Possessive quantifiers can use a special case. */
1.1.1.4 misho 1356: if (SLJIT_UNLIKELY(possessive == length))
1357: return stack_restore ? no_frame : no_stack;
1.1 misho 1358:
1359: if (length > 0)
1360: return length + 1;
1.1.1.4 misho 1361: return stack_restore ? no_frame : no_stack;
1.1 misho 1362: }
1363:
1.1.1.4 misho 1364: static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1.1 misho 1365: {
1366: DEFINE_COMPILER;
1.1.1.3 misho 1367: BOOL setsom_found = recursive;
1368: BOOL setmark_found = recursive;
1.1.1.4 misho 1369: /* The last capture is a local variable even for recursions. */
1370: BOOL capture_last_found = FALSE;
1.1 misho 1371: int offset;
1372:
1373: /* >= 1 + shortest item size (2) */
1.1.1.2 misho 1374: SLJIT_UNUSED_ARG(stacktop);
1.1 misho 1375: SLJIT_ASSERT(stackpos >= stacktop + 2);
1376:
1377: stackpos = STACK(stackpos);
1.1.1.4 misho 1378: if (ccend == NULL)
1379: {
1380: ccend = bracketend(cc) - (1 + LINK_SIZE);
1381: if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1382: cc = next_opcode(common, cc);
1383: }
1384:
1.1 misho 1385: SLJIT_ASSERT(cc != NULL);
1386: while (cc < ccend)
1387: switch(*cc)
1388: {
1389: case OP_SET_SOM:
1.1.1.3 misho 1390: SLJIT_ASSERT(common->has_set_som);
1.1 misho 1391: if (!setsom_found)
1392: {
1393: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1.1.1.4 misho 1394: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1395: stackpos += (int)sizeof(sljit_sw);
1.1 misho 1396: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1.1.1.4 misho 1397: stackpos += (int)sizeof(sljit_sw);
1.1 misho 1398: setsom_found = TRUE;
1399: }
1.1.1.3 misho 1400: cc += 1;
1401: break;
1402:
1403: case OP_MARK:
1.1.1.4 misho 1404: case OP_PRUNE_ARG:
1405: case OP_THEN_ARG:
1.1.1.3 misho 1406: SLJIT_ASSERT(common->mark_ptr != 0);
1407: if (!setmark_found)
1408: {
1409: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1.1.1.4 misho 1410: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1411: stackpos += (int)sizeof(sljit_sw);
1.1.1.3 misho 1412: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1.1.1.4 misho 1413: stackpos += (int)sizeof(sljit_sw);
1.1.1.3 misho 1414: setmark_found = TRUE;
1415: }
1416: cc += 1 + 2 + cc[1];
1417: break;
1418:
1419: case OP_RECURSE:
1420: if (common->has_set_som && !setsom_found)
1421: {
1422: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1.1.1.4 misho 1423: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1424: stackpos += (int)sizeof(sljit_sw);
1.1.1.3 misho 1425: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1.1.1.4 misho 1426: stackpos += (int)sizeof(sljit_sw);
1.1.1.3 misho 1427: setsom_found = TRUE;
1428: }
1429: if (common->mark_ptr != 0 && !setmark_found)
1430: {
1431: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1.1.1.4 misho 1432: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1433: stackpos += (int)sizeof(sljit_sw);
1.1.1.3 misho 1434: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1.1.1.4 misho 1435: stackpos += (int)sizeof(sljit_sw);
1.1.1.3 misho 1436: setmark_found = TRUE;
1437: }
1.1.1.4 misho 1438: if (common->capture_last_ptr != 0 && !capture_last_found)
1439: {
1440: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1441: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1442: stackpos += (int)sizeof(sljit_sw);
1443: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444: stackpos += (int)sizeof(sljit_sw);
1445: capture_last_found = TRUE;
1446: }
1.1.1.3 misho 1447: cc += 1 + LINK_SIZE;
1.1 misho 1448: break;
1449:
1450: case OP_CBRA:
1451: case OP_CBRAPOS:
1452: case OP_SCBRA:
1453: case OP_SCBRAPOS:
1.1.1.4 misho 1454: if (common->capture_last_ptr != 0 && !capture_last_found)
1455: {
1456: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1457: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1458: stackpos += (int)sizeof(sljit_sw);
1459: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1460: stackpos += (int)sizeof(sljit_sw);
1461: capture_last_found = TRUE;
1462: }
1.1 misho 1463: offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1464: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1.1.1.4 misho 1465: stackpos += (int)sizeof(sljit_sw);
1.1 misho 1466: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1467: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1468: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1.1.1.4 misho 1469: stackpos += (int)sizeof(sljit_sw);
1.1 misho 1470: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1.1.1.4 misho 1471: stackpos += (int)sizeof(sljit_sw);
1.1 misho 1472:
1.1.1.2 misho 1473: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 1474: break;
1475:
1476: default:
1477: cc = next_opcode(common, cc);
1478: SLJIT_ASSERT(cc != NULL);
1479: break;
1480: }
1481:
1.1.1.4 misho 1482: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1.1 misho 1483: SLJIT_ASSERT(stackpos == STACK(stacktop));
1484: }
1485:
1.1.1.4 misho 1486: static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1.1 misho 1487: {
1.1.1.4 misho 1488: int private_data_length = needs_control_head ? 3 : 2;
1489: int size;
1.1.1.2 misho 1490: pcre_uchar *alternative;
1.1.1.4 misho 1491: /* Calculate the sum of the private machine words. */
1.1 misho 1492: while (cc < ccend)
1493: {
1.1.1.4 misho 1494: size = 0;
1.1 misho 1495: switch(*cc)
1496: {
1.1.1.4 misho 1497: case OP_KET:
1498: if (PRIVATE_DATA(cc) != 0)
1499: private_data_length++;
1500: cc += 1 + LINK_SIZE;
1501: break;
1502:
1.1 misho 1503: case OP_ASSERT:
1504: case OP_ASSERT_NOT:
1505: case OP_ASSERTBACK:
1506: case OP_ASSERTBACK_NOT:
1507: case OP_ONCE:
1508: case OP_ONCE_NC:
1509: case OP_BRAPOS:
1510: case OP_SBRA:
1511: case OP_SBRAPOS:
1512: case OP_SCOND:
1.1.1.4 misho 1513: private_data_length++;
1.1 misho 1514: cc += 1 + LINK_SIZE;
1515: break;
1516:
1517: case OP_CBRA:
1518: case OP_SCBRA:
1.1.1.4 misho 1519: if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1520: private_data_length++;
1.1.1.2 misho 1521: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 1522: break;
1523:
1524: case OP_CBRAPOS:
1525: case OP_SCBRAPOS:
1.1.1.4 misho 1526: private_data_length += 2;
1.1.1.2 misho 1527: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 1528: break;
1529:
1530: case OP_COND:
1531: /* Might be a hidden SCOND. */
1532: alternative = cc + GET(cc, 1);
1533: if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1.1.1.4 misho 1534: private_data_length++;
1.1 misho 1535: cc += 1 + LINK_SIZE;
1536: break;
1537:
1.1.1.4 misho 1538: CASE_ITERATOR_PRIVATE_DATA_1
1539: if (PRIVATE_DATA(cc))
1540: private_data_length++;
1541: cc += 2;
1542: #ifdef SUPPORT_UTF
1543: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1544: #endif
1545: break;
1546:
1547: CASE_ITERATOR_PRIVATE_DATA_2A
1548: if (PRIVATE_DATA(cc))
1549: private_data_length += 2;
1550: cc += 2;
1551: #ifdef SUPPORT_UTF
1552: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1553: #endif
1554: break;
1555:
1556: CASE_ITERATOR_PRIVATE_DATA_2B
1557: if (PRIVATE_DATA(cc))
1558: private_data_length += 2;
1559: cc += 2 + IMM2_SIZE;
1560: #ifdef SUPPORT_UTF
1561: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1562: #endif
1563: break;
1564:
1565: CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1566: if (PRIVATE_DATA(cc))
1567: private_data_length++;
1568: cc += 1;
1569: break;
1570:
1571: CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1572: if (PRIVATE_DATA(cc))
1573: private_data_length += 2;
1574: cc += 1;
1575: break;
1576:
1577: CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1578: if (PRIVATE_DATA(cc))
1579: private_data_length += 2;
1580: cc += 1 + IMM2_SIZE;
1581: break;
1582:
1583: case OP_CLASS:
1584: case OP_NCLASS:
1585: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1586: case OP_XCLASS:
1587: size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1588: #else
1589: size = 1 + 32 / (int)sizeof(pcre_uchar);
1590: #endif
1591: if (PRIVATE_DATA(cc))
1592: private_data_length += get_class_iterator_size(cc + size);
1593: cc += size;
1594: break;
1595:
1.1 misho 1596: default:
1597: cc = next_opcode(common, cc);
1598: SLJIT_ASSERT(cc != NULL);
1599: break;
1600: }
1601: }
1602: SLJIT_ASSERT(cc == ccend);
1.1.1.4 misho 1603: return private_data_length;
1.1 misho 1604: }
1605:
1.1.1.4 misho 1606: static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1607: BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1.1 misho 1608: {
1609: DEFINE_COMPILER;
1610: int srcw[2];
1.1.1.4 misho 1611: int count, size;
1.1 misho 1612: BOOL tmp1next = TRUE;
1613: BOOL tmp1empty = TRUE;
1614: BOOL tmp2empty = TRUE;
1.1.1.2 misho 1615: pcre_uchar *alternative;
1.1 misho 1616: enum {
1617: start,
1618: loop,
1619: end
1620: } status;
1621:
1622: status = save ? start : loop;
1623: stackptr = STACK(stackptr - 2);
1624: stacktop = STACK(stacktop - 1);
1625:
1626: if (!save)
1627: {
1.1.1.4 misho 1628: stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1.1 misho 1629: if (stackptr < stacktop)
1630: {
1631: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1.1.1.4 misho 1632: stackptr += sizeof(sljit_sw);
1.1 misho 1633: tmp1empty = FALSE;
1634: }
1635: if (stackptr < stacktop)
1636: {
1637: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1.1.1.4 misho 1638: stackptr += sizeof(sljit_sw);
1.1 misho 1639: tmp2empty = FALSE;
1640: }
1641: /* The tmp1next must be TRUE in either way. */
1642: }
1643:
1.1.1.4 misho 1644: do
1.1 misho 1645: {
1646: count = 0;
1647: switch(status)
1648: {
1649: case start:
1.1.1.4 misho 1650: SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1.1 misho 1651: count = 1;
1.1.1.4 misho 1652: srcw[0] = common->recursive_head_ptr;
1653: if (needs_control_head)
1654: {
1655: SLJIT_ASSERT(common->control_head_ptr != 0);
1656: count = 2;
1657: srcw[1] = common->control_head_ptr;
1658: }
1.1 misho 1659: status = loop;
1660: break;
1661:
1662: case loop:
1663: if (cc >= ccend)
1664: {
1665: status = end;
1666: break;
1667: }
1668:
1669: switch(*cc)
1670: {
1.1.1.4 misho 1671: case OP_KET:
1672: if (PRIVATE_DATA(cc) != 0)
1673: {
1674: count = 1;
1675: srcw[0] = PRIVATE_DATA(cc);
1676: }
1677: cc += 1 + LINK_SIZE;
1678: break;
1679:
1.1 misho 1680: case OP_ASSERT:
1681: case OP_ASSERT_NOT:
1682: case OP_ASSERTBACK:
1683: case OP_ASSERTBACK_NOT:
1684: case OP_ONCE:
1685: case OP_ONCE_NC:
1686: case OP_BRAPOS:
1687: case OP_SBRA:
1688: case OP_SBRAPOS:
1689: case OP_SCOND:
1690: count = 1;
1.1.1.4 misho 1691: srcw[0] = PRIVATE_DATA(cc);
1.1 misho 1692: SLJIT_ASSERT(srcw[0] != 0);
1693: cc += 1 + LINK_SIZE;
1694: break;
1695:
1696: case OP_CBRA:
1697: case OP_SCBRA:
1.1.1.4 misho 1698: if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1699: {
1700: count = 1;
1701: srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1702: }
1.1.1.2 misho 1703: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 1704: break;
1705:
1706: case OP_CBRAPOS:
1707: case OP_SCBRAPOS:
1708: count = 2;
1.1.1.4 misho 1709: srcw[0] = PRIVATE_DATA(cc);
1.1 misho 1710: srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1.1.1.4 misho 1711: SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1.1.1.2 misho 1712: cc += 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 1713: break;
1714:
1715: case OP_COND:
1716: /* Might be a hidden SCOND. */
1717: alternative = cc + GET(cc, 1);
1718: if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1719: {
1720: count = 1;
1.1.1.4 misho 1721: srcw[0] = PRIVATE_DATA(cc);
1.1 misho 1722: SLJIT_ASSERT(srcw[0] != 0);
1723: }
1724: cc += 1 + LINK_SIZE;
1725: break;
1726:
1.1.1.4 misho 1727: CASE_ITERATOR_PRIVATE_DATA_1
1728: if (PRIVATE_DATA(cc))
1729: {
1730: count = 1;
1731: srcw[0] = PRIVATE_DATA(cc);
1732: }
1733: cc += 2;
1734: #ifdef SUPPORT_UTF
1735: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1736: #endif
1737: break;
1738:
1739: CASE_ITERATOR_PRIVATE_DATA_2A
1740: if (PRIVATE_DATA(cc))
1741: {
1742: count = 2;
1743: srcw[0] = PRIVATE_DATA(cc);
1744: srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1745: }
1746: cc += 2;
1747: #ifdef SUPPORT_UTF
1748: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1749: #endif
1750: break;
1751:
1752: CASE_ITERATOR_PRIVATE_DATA_2B
1753: if (PRIVATE_DATA(cc))
1754: {
1755: count = 2;
1756: srcw[0] = PRIVATE_DATA(cc);
1757: srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1758: }
1759: cc += 2 + IMM2_SIZE;
1760: #ifdef SUPPORT_UTF
1761: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1762: #endif
1763: break;
1764:
1765: CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1766: if (PRIVATE_DATA(cc))
1767: {
1768: count = 1;
1769: srcw[0] = PRIVATE_DATA(cc);
1770: }
1771: cc += 1;
1772: break;
1773:
1774: CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1775: if (PRIVATE_DATA(cc))
1776: {
1777: count = 2;
1778: srcw[0] = PRIVATE_DATA(cc);
1779: srcw[1] = srcw[0] + sizeof(sljit_sw);
1780: }
1781: cc += 1;
1782: break;
1783:
1784: CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1785: if (PRIVATE_DATA(cc))
1786: {
1787: count = 2;
1788: srcw[0] = PRIVATE_DATA(cc);
1789: srcw[1] = srcw[0] + sizeof(sljit_sw);
1790: }
1791: cc += 1 + IMM2_SIZE;
1792: break;
1793:
1794: case OP_CLASS:
1795: case OP_NCLASS:
1796: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1797: case OP_XCLASS:
1798: size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1799: #else
1800: size = 1 + 32 / (int)sizeof(pcre_uchar);
1801: #endif
1802: if (PRIVATE_DATA(cc))
1803: switch(get_class_iterator_size(cc + size))
1804: {
1805: case 1:
1806: count = 1;
1807: srcw[0] = PRIVATE_DATA(cc);
1808: break;
1809:
1810: case 2:
1811: count = 2;
1812: srcw[0] = PRIVATE_DATA(cc);
1813: srcw[1] = srcw[0] + sizeof(sljit_sw);
1814: break;
1815:
1816: default:
1817: SLJIT_ASSERT_STOP();
1818: break;
1819: }
1820: cc += size;
1821: break;
1822:
1.1 misho 1823: default:
1824: cc = next_opcode(common, cc);
1825: SLJIT_ASSERT(cc != NULL);
1826: break;
1827: }
1828: break;
1829:
1830: case end:
1831: SLJIT_ASSERT_STOP();
1832: break;
1833: }
1834:
1835: while (count > 0)
1836: {
1837: count--;
1838: if (save)
1839: {
1840: if (tmp1next)
1841: {
1842: if (!tmp1empty)
1843: {
1844: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1.1.1.4 misho 1845: stackptr += sizeof(sljit_sw);
1.1 misho 1846: }
1847: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1848: tmp1empty = FALSE;
1849: tmp1next = FALSE;
1850: }
1851: else
1852: {
1853: if (!tmp2empty)
1854: {
1855: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1.1.1.4 misho 1856: stackptr += sizeof(sljit_sw);
1.1 misho 1857: }
1858: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1859: tmp2empty = FALSE;
1860: tmp1next = TRUE;
1861: }
1862: }
1863: else
1864: {
1865: if (tmp1next)
1866: {
1867: SLJIT_ASSERT(!tmp1empty);
1868: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1869: tmp1empty = stackptr >= stacktop;
1870: if (!tmp1empty)
1871: {
1872: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1.1.1.4 misho 1873: stackptr += sizeof(sljit_sw);
1.1 misho 1874: }
1875: tmp1next = FALSE;
1876: }
1877: else
1878: {
1879: SLJIT_ASSERT(!tmp2empty);
1880: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1881: tmp2empty = stackptr >= stacktop;
1882: if (!tmp2empty)
1883: {
1884: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1.1.1.4 misho 1885: stackptr += sizeof(sljit_sw);
1.1 misho 1886: }
1887: tmp1next = TRUE;
1888: }
1889: }
1890: }
1891: }
1.1.1.4 misho 1892: while (status != end);
1.1 misho 1893:
1894: if (save)
1895: {
1896: if (tmp1next)
1897: {
1898: if (!tmp1empty)
1899: {
1900: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1.1.1.4 misho 1901: stackptr += sizeof(sljit_sw);
1.1 misho 1902: }
1903: if (!tmp2empty)
1904: {
1905: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1.1.1.4 misho 1906: stackptr += sizeof(sljit_sw);
1.1 misho 1907: }
1908: }
1909: else
1910: {
1911: if (!tmp2empty)
1912: {
1913: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1.1.1.4 misho 1914: stackptr += sizeof(sljit_sw);
1.1 misho 1915: }
1916: if (!tmp1empty)
1917: {
1918: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1.1.1.4 misho 1919: stackptr += sizeof(sljit_sw);
1.1 misho 1920: }
1921: }
1922: }
1923: SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1924: }
1925:
1.1.1.4 misho 1926: static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1927: {
1928: pcre_uchar *end = bracketend(cc);
1929: BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1930:
1931: /* Assert captures then. */
1932: if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1933: current_offset = NULL;
1934: /* Conditional block does not. */
1935: if (*cc == OP_COND || *cc == OP_SCOND)
1936: has_alternatives = FALSE;
1937:
1938: cc = next_opcode(common, cc);
1939: if (has_alternatives)
1940: current_offset = common->then_offsets + (cc - common->start);
1941:
1942: while (cc < end)
1943: {
1944: if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1945: cc = set_then_offsets(common, cc, current_offset);
1946: else
1947: {
1948: if (*cc == OP_ALT && has_alternatives)
1949: current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1950: if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1951: *current_offset = 1;
1952: cc = next_opcode(common, cc);
1953: }
1954: }
1955:
1956: return end;
1957: }
1958:
1959: #undef CASE_ITERATOR_PRIVATE_DATA_1
1960: #undef CASE_ITERATOR_PRIVATE_DATA_2A
1961: #undef CASE_ITERATOR_PRIVATE_DATA_2B
1962: #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1963: #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964: #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1965:
1966: static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1.1 misho 1967: {
1968: return (value & (value - 1)) == 0;
1969: }
1970:
1971: static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1972: {
1973: while (list)
1974: {
1975: /* sljit_set_label is clever enough to do nothing
1.1.1.4 misho 1976: if either the jump or the label is NULL. */
1977: SET_LABEL(list->jump, label);
1.1 misho 1978: list = list->next;
1979: }
1980: }
1981:
1982: static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1983: {
1984: jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1985: if (list_item)
1986: {
1987: list_item->next = *list;
1988: list_item->jump = jump;
1989: *list = list_item;
1990: }
1991: }
1992:
1.1.1.4 misho 1993: static void add_stub(compiler_common *common, struct sljit_jump *start)
1.1 misho 1994: {
1995: DEFINE_COMPILER;
1996: stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1997:
1998: if (list_item)
1999: {
2000: list_item->start = start;
1.1.1.4 misho 2001: list_item->quit = LABEL();
1.1 misho 2002: list_item->next = common->stubs;
2003: common->stubs = list_item;
2004: }
2005: }
2006:
2007: static void flush_stubs(compiler_common *common)
2008: {
2009: DEFINE_COMPILER;
2010: stub_list* list_item = common->stubs;
2011:
2012: while (list_item)
2013: {
2014: JUMPHERE(list_item->start);
1.1.1.4 misho 2015: add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2016: JUMPTO(SLJIT_JUMP, list_item->quit);
1.1 misho 2017: list_item = list_item->next;
2018: }
2019: common->stubs = NULL;
2020: }
2021:
1.1.1.4 misho 2022: static SLJIT_INLINE void count_match(compiler_common *common)
1.1 misho 2023: {
2024: DEFINE_COMPILER;
2025:
1.1.1.4 misho 2026: OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
1.1 misho 2027: add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2028: }
2029:
2030: static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2031: {
2032: /* May destroy all locals and registers except TMP2. */
2033: DEFINE_COMPILER;
2034:
1.1.1.4 misho 2035: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1.1 misho 2036: #ifdef DESTROY_REGISTERS
2037: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2038: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2039: OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2040: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2041: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2042: #endif
1.1.1.4 misho 2043: add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1.1 misho 2044: }
2045:
2046: static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2047: {
2048: DEFINE_COMPILER;
1.1.1.4 misho 2049: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1.1 misho 2050: }
2051:
2052: static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2053: {
2054: DEFINE_COMPILER;
2055: struct sljit_label *loop;
2056: int i;
1.1.1.4 misho 2057:
1.1 misho 2058: /* At this point we can freely use all temporary registers. */
1.1.1.4 misho 2059: SLJIT_ASSERT(length > 1);
1.1 misho 2060: /* TMP1 returns with begin - 1. */
1.1.1.4 misho 2061: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2062: if (length < 8)
2063: {
1.1.1.4 misho 2064: for (i = 1; i < length; i++)
2065: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1.1 misho 2066: }
2067: else
2068: {
1.1.1.4 misho 2069: GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2070: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1.1 misho 2071: loop = LABEL();
1.1.1.4 misho 2072: OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2073: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1.1 misho 2074: JUMPTO(SLJIT_C_NOT_ZERO, loop);
2075: }
2076: }
2077:
1.1.1.4 misho 2078: static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2079: {
2080: DEFINE_COMPILER;
2081: struct sljit_label *loop;
2082: int i;
2083:
2084: SLJIT_ASSERT(length > 1);
2085: /* OVECTOR(1) contains the "string begin - 1" constant. */
2086: if (length > 2)
2087: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2088: if (length < 8)
2089: {
2090: for (i = 2; i < length; i++)
2091: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2092: }
2093: else
2094: {
2095: GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2096: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2097: loop = LABEL();
2098: OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2099: OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2100: JUMPTO(SLJIT_C_NOT_ZERO, loop);
2101: }
2102:
2103: OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2104: if (common->mark_ptr != 0)
2105: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2106: if (common->control_head_ptr != 0)
2107: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2108: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2109: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2110: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2111: }
2112:
2113: static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2114: {
2115: while (current != NULL)
2116: {
2117: switch (current[-2])
2118: {
2119: case type_then_trap:
2120: break;
2121:
2122: case type_mark:
2123: if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2124: return current[-4];
2125: break;
2126:
2127: default:
2128: SLJIT_ASSERT_STOP();
2129: break;
2130: }
2131: current = (sljit_sw*)current[-1];
2132: }
2133: return -1;
2134: }
2135:
1.1 misho 2136: static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2137: {
2138: DEFINE_COMPILER;
2139: struct sljit_label *loop;
1.1.1.4 misho 2140: struct sljit_jump *early_quit;
1.1 misho 2141:
2142: /* At this point we can freely use all registers. */
1.1.1.2 misho 2143: OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1.1 misho 2144: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2145:
1.1.1.4 misho 2146: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1.1.1.3 misho 2147: if (common->mark_ptr != 0)
1.1.1.4 misho 2148: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2149: OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1.1.1.3 misho 2150: if (common->mark_ptr != 0)
1.1.1.4 misho 2151: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2152: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2153: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1.1.1.3 misho 2154: GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1.1 misho 2155: /* Unlikely, but possible */
1.1.1.4 misho 2156: early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1.1 misho 2157: loop = LABEL();
1.1.1.4 misho 2158: OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2159: OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1.1 misho 2160: /* Copy the integer value to the output buffer */
1.1.1.4 misho 2161: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2162: OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1.1.1.2 misho 2163: #endif
1.1.1.4 misho 2164: OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2165: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1.1 misho 2166: JUMPTO(SLJIT_C_NOT_ZERO, loop);
1.1.1.4 misho 2167: JUMPHERE(early_quit);
1.1 misho 2168:
2169: /* Calculate the return value, which is the maximum ovector value. */
2170: if (topbracket > 1)
2171: {
1.1.1.4 misho 2172: GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2173: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1.1 misho 2174:
1.1.1.2 misho 2175: /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1.1 misho 2176: loop = LABEL();
1.1.1.4 misho 2177: OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2178: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2179: CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2180: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1.1 misho 2181: }
2182: else
2183: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2184: }
2185:
1.1.1.4 misho 2186: static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1.1.1.3 misho 2187: {
2188: DEFINE_COMPILER;
1.1.1.4 misho 2189: struct sljit_jump *jump;
1.1.1.3 misho 2190:
2191: SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1.1.1.4 misho 2192: SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2193: && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1.1.1.3 misho 2194:
1.1.1.4 misho 2195: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1.1.1.3 misho 2196: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1.1.1.4 misho 2197: OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2198: CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1.1.1.3 misho 2199:
2200: /* Store match begin and end. */
1.1.1.4 misho 2201: OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2202: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2203:
2204: jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2205: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2206: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2207: OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2208: #endif
2209: OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2210: JUMPHERE(jump);
2211:
2212: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1.1.1.3 misho 2213: OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1.1.1.4 misho 2214: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2215: OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1.1.1.3 misho 2216: #endif
1.1.1.4 misho 2217: OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1.1.1.3 misho 2218:
1.1.1.4 misho 2219: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2220: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2221: OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1.1.1.3 misho 2222: #endif
1.1.1.4 misho 2223: OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1.1.1.3 misho 2224:
1.1.1.4 misho 2225: JUMPTO(SLJIT_JUMP, quit);
1.1.1.3 misho 2226: }
2227:
2228: static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2229: {
2230: /* May destroy TMP1. */
2231: DEFINE_COMPILER;
2232: struct sljit_jump *jump;
2233:
2234: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2235: {
2236: /* The value of -1 must be kept for start_used_ptr! */
2237: OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2238: /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2239: is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2240: jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2241: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2242: JUMPHERE(jump);
2243: }
2244: else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2245: {
2246: jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2247: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2248: JUMPHERE(jump);
2249: }
2250: }
2251:
1.1.1.2 misho 2252: static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1.1 misho 2253: {
2254: /* Detects if the character has an othercase. */
2255: unsigned int c;
2256:
1.1.1.2 misho 2257: #ifdef SUPPORT_UTF
2258: if (common->utf)
1.1 misho 2259: {
2260: GETCHAR(c, cc);
2261: if (c > 127)
2262: {
2263: #ifdef SUPPORT_UCP
2264: return c != UCD_OTHERCASE(c);
2265: #else
2266: return FALSE;
2267: #endif
2268: }
1.1.1.2 misho 2269: #ifndef COMPILE_PCRE8
2270: return common->fcc[c] != c;
2271: #endif
1.1 misho 2272: }
2273: else
2274: #endif
2275: c = *cc;
1.1.1.2 misho 2276: return MAX_255(c) ? common->fcc[c] != c : FALSE;
1.1 misho 2277: }
2278:
2279: static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2280: {
2281: /* Returns with the othercase. */
1.1.1.2 misho 2282: #ifdef SUPPORT_UTF
2283: if (common->utf && c > 127)
1.1 misho 2284: {
2285: #ifdef SUPPORT_UCP
2286: return UCD_OTHERCASE(c);
2287: #else
2288: return c;
2289: #endif
2290: }
2291: #endif
1.1.1.2 misho 2292: return TABLE_GET(c, common->fcc, c);
1.1 misho 2293: }
2294:
1.1.1.2 misho 2295: static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1.1 misho 2296: {
2297: /* Detects if the character and its othercase has only 1 bit difference. */
2298: unsigned int c, oc, bit;
1.1.1.2 misho 2299: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1.1 misho 2300: int n;
2301: #endif
2302:
1.1.1.2 misho 2303: #ifdef SUPPORT_UTF
2304: if (common->utf)
1.1 misho 2305: {
2306: GETCHAR(c, cc);
2307: if (c <= 127)
2308: oc = common->fcc[c];
2309: else
2310: {
2311: #ifdef SUPPORT_UCP
2312: oc = UCD_OTHERCASE(c);
2313: #else
2314: oc = c;
2315: #endif
2316: }
2317: }
2318: else
2319: {
2320: c = *cc;
1.1.1.2 misho 2321: oc = TABLE_GET(c, common->fcc, c);
1.1 misho 2322: }
2323: #else
2324: c = *cc;
1.1.1.2 misho 2325: oc = TABLE_GET(c, common->fcc, c);
1.1 misho 2326: #endif
2327:
2328: SLJIT_ASSERT(c != oc);
2329:
2330: bit = c ^ oc;
2331: /* Optimized for English alphabet. */
2332: if (c <= 127 && bit == 0x20)
2333: return (0 << 8) | 0x20;
2334:
2335: /* Since c != oc, they must have at least 1 bit difference. */
1.1.1.4 misho 2336: if (!is_powerof2(bit))
1.1 misho 2337: return 0;
2338:
1.1.1.4 misho 2339: #if defined COMPILE_PCRE8
1.1.1.2 misho 2340:
2341: #ifdef SUPPORT_UTF
2342: if (common->utf && c > 127)
1.1 misho 2343: {
1.1.1.2 misho 2344: n = GET_EXTRALEN(*cc);
1.1 misho 2345: while ((bit & 0x3f) == 0)
2346: {
2347: n--;
2348: bit >>= 6;
2349: }
2350: return (n << 8) | bit;
2351: }
1.1.1.2 misho 2352: #endif /* SUPPORT_UTF */
1.1 misho 2353: return (0 << 8) | bit;
1.1.1.2 misho 2354:
1.1.1.4 misho 2355: #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2 misho 2356:
2357: #ifdef SUPPORT_UTF
2358: if (common->utf && c > 65535)
2359: {
2360: if (bit >= (1 << 10))
2361: bit >>= 10;
2362: else
2363: return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2364: }
2365: #endif /* SUPPORT_UTF */
2366: return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2367:
1.1.1.4 misho 2368: #endif /* COMPILE_PCRE[8|16|32] */
1.1 misho 2369: }
2370:
1.1.1.3 misho 2371: static void check_partial(compiler_common *common, BOOL force)
2372: {
1.1.1.4 misho 2373: /* Checks whether a partial matching is occurred. Does not modify registers. */
1.1.1.3 misho 2374: DEFINE_COMPILER;
2375: struct sljit_jump *jump = NULL;
2376:
2377: SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2378:
2379: if (common->mode == JIT_COMPILE)
2380: return;
2381:
2382: if (!force)
2383: jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2384: else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2385: jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2386:
2387: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1.1.1.4 misho 2388: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
1.1.1.3 misho 2389: else
2390: {
2391: if (common->partialmatchlabel != NULL)
2392: JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2393: else
2394: add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2395: }
2396:
2397: if (jump != NULL)
2398: JUMPHERE(jump);
2399: }
2400:
1.1.1.4 misho 2401: static void check_str_end(compiler_common *common, jump_list **end_reached)
1.1.1.3 misho 2402: {
2403: /* Does not affect registers. Usually used in a tight spot. */
2404: DEFINE_COMPILER;
2405: struct sljit_jump *jump;
2406:
2407: if (common->mode == JIT_COMPILE)
1.1.1.4 misho 2408: {
2409: add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2410: return;
2411: }
1.1.1.3 misho 2412:
2413: jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2414: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2415: {
1.1.1.4 misho 2416: add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2417: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2418: add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
1.1.1.3 misho 2419: }
2420: else
2421: {
1.1.1.4 misho 2422: add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1.1.1.3 misho 2423: if (common->partialmatchlabel != NULL)
2424: JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2425: else
2426: add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2427: }
2428: JUMPHERE(jump);
2429: }
2430:
2431: static void detect_partial_match(compiler_common *common, jump_list **backtracks)
1.1 misho 2432: {
2433: DEFINE_COMPILER;
1.1.1.3 misho 2434: struct sljit_jump *jump;
2435:
2436: if (common->mode == JIT_COMPILE)
2437: {
2438: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2439: return;
2440: }
2441:
2442: /* Partial matching mode. */
2443: jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2444: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2445: if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2446: {
1.1.1.4 misho 2447: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
1.1.1.3 misho 2448: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2449: }
2450: else
2451: {
2452: if (common->partialmatchlabel != NULL)
2453: JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2454: else
2455: add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2456: }
2457: JUMPHERE(jump);
1.1 misho 2458: }
2459:
2460: static void read_char(compiler_common *common)
2461: {
2462: /* Reads the character into TMP1, updates STR_PTR.
2463: Does not check STR_END. TMP2 Destroyed. */
2464: DEFINE_COMPILER;
1.1.1.4 misho 2465: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1 misho 2466: struct sljit_jump *jump;
2467: #endif
2468:
1.1.1.2 misho 2469: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1.1.4 misho 2470: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.2 misho 2471: if (common->utf)
1.1 misho 2472: {
1.1.1.4 misho 2473: #if defined COMPILE_PCRE8
1.1 misho 2474: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1.1.1.4 misho 2475: #elif defined COMPILE_PCRE16
1.1.1.2 misho 2476: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1.1.1.4 misho 2477: #endif /* COMPILE_PCRE[8|16] */
1.1.1.2 misho 2478: add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1.1 misho 2479: JUMPHERE(jump);
2480: }
1.1.1.4 misho 2481: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
1.1.1.2 misho 2482: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2483: }
2484:
2485: static void peek_char(compiler_common *common)
2486: {
2487: /* Reads the character into TMP1, keeps STR_PTR.
2488: Does not check STR_END. TMP2 Destroyed. */
2489: DEFINE_COMPILER;
1.1.1.4 misho 2490: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1 misho 2491: struct sljit_jump *jump;
2492: #endif
2493:
1.1.1.2 misho 2494: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1.1.4 misho 2495: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.2 misho 2496: if (common->utf)
1.1 misho 2497: {
1.1.1.4 misho 2498: #if defined COMPILE_PCRE8
1.1 misho 2499: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1.1.1.4 misho 2500: #elif defined COMPILE_PCRE16
1.1.1.2 misho 2501: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1.1.1.4 misho 2502: #endif /* COMPILE_PCRE[8|16] */
1.1.1.2 misho 2503: add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1.1 misho 2504: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2505: JUMPHERE(jump);
2506: }
1.1.1.4 misho 2507: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
1.1 misho 2508: }
2509:
2510: static void read_char8_type(compiler_common *common)
2511: {
2512: /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2513: DEFINE_COMPILER;
1.1.1.4 misho 2514: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1 misho 2515: struct sljit_jump *jump;
2516: #endif
2517:
1.1.1.2 misho 2518: #ifdef SUPPORT_UTF
2519: if (common->utf)
1.1 misho 2520: {
1.1.1.2 misho 2521: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2522: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 2523: #if defined COMPILE_PCRE8
1.1 misho 2524: /* This can be an extra read in some situations, but hopefully
1.1.1.2 misho 2525: it is needed in most cases. */
1.1 misho 2526: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2527: jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1.1.1.2 misho 2528: add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2529: JUMPHERE(jump);
1.1.1.4 misho 2530: #elif defined COMPILE_PCRE16
1.1.1.2 misho 2531: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2532: jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2533: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1.1 misho 2534: JUMPHERE(jump);
1.1.1.2 misho 2535: /* Skip low surrogate if necessary. */
2536: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2537: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1.1.1.4 misho 2538: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1.1.2 misho 2539: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2540: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1.1.1.4 misho 2541: #elif defined COMPILE_PCRE32
2542: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2543: jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2544: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545: JUMPHERE(jump);
2546: #endif /* COMPILE_PCRE[8|16|32] */
1.1 misho 2547: return;
2548: }
1.1.1.4 misho 2549: #endif /* SUPPORT_UTF */
1.1.1.2 misho 2550: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2551: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 2552: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2 misho 2553: /* The ctypes array contains only 256 values. */
2554: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2555: jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2556: #endif
2557: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1.1.1.4 misho 2558: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2 misho 2559: JUMPHERE(jump);
2560: #endif
1.1 misho 2561: }
2562:
2563: static void skip_char_back(compiler_common *common)
2564: {
1.1.1.2 misho 2565: /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1.1 misho 2566: DEFINE_COMPILER;
1.1.1.4 misho 2567: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2568: #if defined COMPILE_PCRE8
1.1 misho 2569: struct sljit_label *label;
2570:
1.1.1.2 misho 2571: if (common->utf)
1.1 misho 2572: {
2573: label = LABEL();
1.1.1.2 misho 2574: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2575: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2576: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2577: CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2578: return;
2579: }
1.1.1.4 misho 2580: #elif defined COMPILE_PCRE16
1.1.1.2 misho 2581: if (common->utf)
2582: {
2583: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2584: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585: /* Skip low surrogate if necessary. */
2586: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2587: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1.1.1.4 misho 2588: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1.1.2 misho 2589: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2590: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2591: return;
2592: }
1.1.1.4 misho 2593: #endif /* COMPILE_PCRE[8|16] */
2594: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
1.1.1.2 misho 2595: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2596: }
2597:
1.1.1.3 misho 2598: static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
1.1 misho 2599: {
2600: /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2601: DEFINE_COMPILER;
2602:
2603: if (nltype == NLTYPE_ANY)
2604: {
2605: add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1.1.1.3 misho 2606: add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1.1 misho 2607: }
2608: else if (nltype == NLTYPE_ANYCRLF)
2609: {
2610: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1.1.1.4 misho 2611: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1 misho 2612: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1.1.1.4 misho 2613: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1.1.3 misho 2614: add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1.1 misho 2615: }
2616: else
2617: {
1.1.1.2 misho 2618: SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1.1.1.3 misho 2619: add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1.1 misho 2620: }
2621: }
2622:
1.1.1.2 misho 2623: #ifdef SUPPORT_UTF
2624:
1.1.1.4 misho 2625: #if defined COMPILE_PCRE8
1.1.1.2 misho 2626: static void do_utfreadchar(compiler_common *common)
1.1 misho 2627: {
1.1.1.2 misho 2628: /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1.1 misho 2629: of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2630: DEFINE_COMPILER;
2631: struct sljit_jump *jump;
2632:
1.1.1.3 misho 2633: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 2634: /* Searching for the first zero. */
2635: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2636: jump = JUMP(SLJIT_C_NOT_ZERO);
1.1.1.2 misho 2637: /* Two byte sequence. */
2638: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2639: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2640: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2641: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2642: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2643: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.2 misho 2644: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2645: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2646: JUMPHERE(jump);
2647:
2648: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2649: jump = JUMP(SLJIT_C_NOT_ZERO);
1.1.1.2 misho 2650: /* Three byte sequence. */
2651: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1.1 misho 2652: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2653: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2654: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2655: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2656: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.2 misho 2657: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2658: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1.1 misho 2659: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.2 misho 2661: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1.1 misho 2662: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2663: JUMPHERE(jump);
2664:
1.1.1.2 misho 2665: /* Four byte sequence. */
2666: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1.1 misho 2667: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2668: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2669: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2670: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2671: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.2 misho 2672: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1.1 misho 2673: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2674: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2675: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.2 misho 2676: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2677: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1.1 misho 2678: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.2 misho 2680: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1.1 misho 2681: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2682: }
2683:
1.1.1.2 misho 2684: static void do_utfreadtype8(compiler_common *common)
1.1 misho 2685: {
1.1.1.2 misho 2686: /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2687: of the character (>= 0xc0). Return value in TMP1. */
1.1 misho 2688: DEFINE_COMPILER;
2689: struct sljit_jump *jump;
2690: struct sljit_jump *compare;
2691:
1.1.1.3 misho 2692: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 2693:
2694: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2695: jump = JUMP(SLJIT_C_NOT_ZERO);
1.1.1.2 misho 2696: /* Two byte sequence. */
2697: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2698: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2699: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2700: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2701: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2702: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2703: compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2704: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2705: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2706:
2707: JUMPHERE(compare);
2708: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2709: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2710: JUMPHERE(jump);
2711:
2712: /* We only have types for characters less than 256. */
1.1.1.4 misho 2713: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
1.1 misho 2714: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2715: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2716: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2717: }
2718:
1.1.1.4 misho 2719: #elif defined COMPILE_PCRE16
1.1.1.2 misho 2720:
2721: static void do_utfreadchar(compiler_common *common)
2722: {
2723: /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2724: of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2725: DEFINE_COMPILER;
2726: struct sljit_jump *jump;
2727:
1.1.1.3 misho 2728: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1.1.2 misho 2729: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2730: /* Do nothing, only return. */
2731: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2732:
2733: JUMPHERE(jump);
2734: /* Combine two 16 bit characters. */
2735: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2736: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2737: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2738: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2739: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2740: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2741: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2742: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2743: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2744: }
2745:
1.1.1.4 misho 2746: #endif /* COMPILE_PCRE[8|16] */
1.1.1.2 misho 2747:
2748: #endif /* SUPPORT_UTF */
1.1 misho 2749:
2750: #ifdef SUPPORT_UCP
2751:
2752: /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2753: #define UCD_BLOCK_MASK 127
2754: #define UCD_BLOCK_SHIFT 7
2755:
2756: static void do_getucd(compiler_common *common)
2757: {
2758: /* Search the UCD record for the character comes in TMP1.
2759: Returns chartype in TMP1 and UCD offset in TMP2. */
2760: DEFINE_COMPILER;
2761:
2762: SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2763:
1.1.1.3 misho 2764: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 2765: OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1.1.1.4 misho 2766: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
1.1 misho 2767: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2768: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2769: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1.1.1.4 misho 2770: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
1.1 misho 2771: OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1.1.1.4 misho 2772: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1.1 misho 2773: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2774: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2775: }
2776: #endif
2777:
2778: static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2779: {
2780: DEFINE_COMPILER;
2781: struct sljit_label *mainloop;
2782: struct sljit_label *newlinelabel = NULL;
2783: struct sljit_jump *start;
2784: struct sljit_jump *end = NULL;
2785: struct sljit_jump *nl = NULL;
1.1.1.4 misho 2786: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.2 misho 2787: struct sljit_jump *singlechar;
1.1 misho 2788: #endif
2789: jump_list *newline = NULL;
2790: BOOL newlinecheck = FALSE;
1.1.1.2 misho 2791: BOOL readuchar = FALSE;
1.1 misho 2792:
2793: if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2794: common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2795: newlinecheck = TRUE;
2796:
2797: if (firstline)
2798: {
2799: /* Search for the end of the first line. */
1.1.1.3 misho 2800: SLJIT_ASSERT(common->first_line_end != 0);
1.1.1.4 misho 2801: OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
1.1 misho 2802:
2803: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2804: {
2805: mainloop = LABEL();
1.1.1.2 misho 2806: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2807: end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1.1.1.2 misho 2808: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2809: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1.1 misho 2810: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2811: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1.1.1.4 misho 2812: JUMPHERE(end);
1.1.1.3 misho 2813: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2814: }
2815: else
2816: {
2817: end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2818: mainloop = LABEL();
2819: /* Continual stores does not cause data dependency. */
1.1.1.3 misho 2820: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
1.1 misho 2821: read_char(common);
2822: check_newlinechar(common, common->nltype, &newline, TRUE);
2823: CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1.1.1.4 misho 2824: JUMPHERE(end);
1.1.1.3 misho 2825: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
1.1 misho 2826: set_jumps(newline, LABEL());
2827: }
2828:
1.1.1.4 misho 2829: OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
1.1 misho 2830: }
2831:
2832: start = JUMP(SLJIT_JUMP);
2833:
2834: if (newlinecheck)
2835: {
2836: newlinelabel = LABEL();
1.1.1.2 misho 2837: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 2838: end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1.1.1.2 misho 2839: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1 misho 2840: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1.1.1.4 misho 2841: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2842: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2843: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
1.1.1.2 misho 2844: #endif
1.1 misho 2845: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2846: nl = JUMP(SLJIT_JUMP);
2847: }
2848:
2849: mainloop = LABEL();
2850:
2851: /* Increasing the STR_PTR here requires one less jump in the most common case. */
1.1.1.2 misho 2852: #ifdef SUPPORT_UTF
2853: if (common->utf) readuchar = TRUE;
1.1 misho 2854: #endif
1.1.1.2 misho 2855: if (newlinecheck) readuchar = TRUE;
1.1 misho 2856:
1.1.1.2 misho 2857: if (readuchar)
2858: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1 misho 2859:
2860: if (newlinecheck)
2861: CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2862:
1.1.1.2 misho 2863: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 2864: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2865: #if defined COMPILE_PCRE8
1.1.1.2 misho 2866: if (common->utf)
1.1 misho 2867: {
1.1.1.2 misho 2868: singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1.1.1.4 misho 2869: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
1.1 misho 2870: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1.1.1.2 misho 2871: JUMPHERE(singlechar);
2872: }
1.1.1.4 misho 2873: #elif defined COMPILE_PCRE16
1.1.1.2 misho 2874: if (common->utf)
2875: {
2876: singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2877: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2878: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1.1.1.4 misho 2879: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1.1.2 misho 2880: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2881: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2882: JUMPHERE(singlechar);
1.1 misho 2883: }
1.1.1.4 misho 2884: #endif /* COMPILE_PCRE[8|16] */
2885: #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
1.1 misho 2886: JUMPHERE(start);
2887:
2888: if (newlinecheck)
2889: {
2890: JUMPHERE(end);
2891: JUMPHERE(nl);
2892: }
2893:
2894: return mainloop;
2895: }
2896:
1.1.1.4 misho 2897: #define MAX_N_CHARS 3
2898:
2899: static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2900: {
2901: DEFINE_COMPILER;
2902: struct sljit_label *start;
2903: struct sljit_jump *quit;
2904: pcre_uint32 chars[MAX_N_CHARS * 2];
2905: pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2906: int location = 0;
2907: pcre_int32 len, c, bit, caseless;
2908: int must_stop;
2909:
2910: /* We do not support alternatives now. */
2911: if (*(common->start + GET(common->start, 1)) == OP_ALT)
2912: return FALSE;
2913:
2914: while (TRUE)
2915: {
2916: caseless = 0;
2917: must_stop = 1;
2918: switch(*cc)
2919: {
2920: case OP_CHAR:
2921: must_stop = 0;
2922: cc++;
2923: break;
2924:
2925: case OP_CHARI:
2926: caseless = 1;
2927: must_stop = 0;
2928: cc++;
2929: break;
2930:
2931: case OP_SOD:
2932: case OP_SOM:
2933: case OP_SET_SOM:
2934: case OP_NOT_WORD_BOUNDARY:
2935: case OP_WORD_BOUNDARY:
2936: case OP_EODN:
2937: case OP_EOD:
2938: case OP_CIRC:
2939: case OP_CIRCM:
2940: case OP_DOLL:
2941: case OP_DOLLM:
2942: /* Zero width assertions. */
2943: cc++;
2944: continue;
2945:
2946: case OP_PLUS:
2947: case OP_MINPLUS:
2948: case OP_POSPLUS:
2949: cc++;
2950: break;
2951:
2952: case OP_EXACT:
2953: cc += 1 + IMM2_SIZE;
2954: break;
2955:
2956: case OP_PLUSI:
2957: case OP_MINPLUSI:
2958: case OP_POSPLUSI:
2959: caseless = 1;
2960: cc++;
2961: break;
2962:
2963: case OP_EXACTI:
2964: caseless = 1;
2965: cc += 1 + IMM2_SIZE;
2966: break;
2967:
2968: default:
2969: must_stop = 2;
2970: break;
2971: }
2972:
2973: if (must_stop == 2)
2974: break;
2975:
2976: len = 1;
2977: #ifdef SUPPORT_UTF
2978: if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2979: #endif
2980:
2981: if (caseless && char_has_othercase(common, cc))
2982: {
2983: caseless = char_get_othercase_bit(common, cc);
2984: if (caseless == 0)
2985: return FALSE;
2986: #ifdef COMPILE_PCRE8
2987: caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2988: #else
2989: if ((caseless & 0x100) != 0)
2990: caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2991: else
2992: caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2993: #endif
2994: }
2995: else
2996: caseless = 0;
2997:
2998: while (len > 0 && location < MAX_N_CHARS * 2)
2999: {
3000: c = *cc;
3001: bit = 0;
3002: if (len == (caseless & 0xff))
3003: {
3004: bit = caseless >> 8;
3005: c |= bit;
3006: }
3007:
3008: chars[location] = c;
3009: chars[location + 1] = bit;
3010:
3011: len--;
3012: location += 2;
3013: cc++;
3014: }
3015:
3016: if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3017: break;
3018: }
3019:
3020: /* At least two characters are required. */
3021: if (location < 2 * 2)
3022: return FALSE;
3023:
3024: if (firstline)
3025: {
3026: SLJIT_ASSERT(common->first_line_end != 0);
3027: OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3028: OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3029: }
3030: else
3031: OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3032:
3033: start = LABEL();
3034: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3035:
3036: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3037: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3038: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3039: if (chars[1] != 0)
3040: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3041: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3042: if (location > 2 * 2)
3043: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3044: if (chars[3] != 0)
3045: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3046: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3047: if (location > 2 * 2)
3048: {
3049: if (chars[5] != 0)
3050: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3051: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3052: }
3053: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3054:
3055: JUMPHERE(quit);
3056:
3057: if (firstline)
3058: OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3059: else
3060: OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3061: return TRUE;
3062: }
3063:
3064: #undef MAX_N_CHARS
3065:
1.1.1.2 misho 3066: static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
1.1 misho 3067: {
3068: DEFINE_COMPILER;
3069: struct sljit_label *start;
1.1.1.4 misho 3070: struct sljit_jump *quit;
1.1 misho 3071: struct sljit_jump *found;
1.1.1.2 misho 3072: pcre_uchar oc, bit;
1.1 misho 3073:
3074: if (firstline)
3075: {
1.1.1.4 misho 3076: SLJIT_ASSERT(common->first_line_end != 0);
3077: OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
1.1.1.3 misho 3078: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
1.1 misho 3079: }
3080:
3081: start = LABEL();
1.1.1.4 misho 3082: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1.1.1.2 misho 3083: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1 misho 3084:
1.1.1.2 misho 3085: oc = first_char;
3086: if (caseless)
3087: {
3088: oc = TABLE_GET(first_char, common->fcc, first_char);
3089: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3090: if (first_char > 127 && common->utf)
3091: oc = UCD_OTHERCASE(first_char);
3092: #endif
3093: }
3094: if (first_char == oc)
3095: found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
1.1 misho 3096: else
3097: {
1.1.1.2 misho 3098: bit = first_char ^ oc;
1.1.1.4 misho 3099: if (is_powerof2(bit))
1.1 misho 3100: {
3101: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1.1.1.2 misho 3102: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
1.1 misho 3103: }
3104: else
3105: {
1.1.1.2 misho 3106: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
1.1.1.4 misho 3107: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1 misho 3108: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1.1.1.4 misho 3109: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3110: found = JUMP(SLJIT_C_NOT_ZERO);
3111: }
3112: }
3113:
1.1.1.2 misho 3114: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3115: JUMPTO(SLJIT_JUMP, start);
3116: JUMPHERE(found);
1.1.1.4 misho 3117: JUMPHERE(quit);
1.1 misho 3118:
3119: if (firstline)
1.1.1.4 misho 3120: OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
1.1 misho 3121: }
3122:
3123: static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3124: {
3125: DEFINE_COMPILER;
3126: struct sljit_label *loop;
3127: struct sljit_jump *lastchar;
3128: struct sljit_jump *firstchar;
1.1.1.4 misho 3129: struct sljit_jump *quit;
1.1 misho 3130: struct sljit_jump *foundcr = NULL;
3131: struct sljit_jump *notfoundnl;
3132: jump_list *newline = NULL;
3133:
3134: if (firstline)
3135: {
1.1.1.4 misho 3136: SLJIT_ASSERT(common->first_line_end != 0);
3137: OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
1.1.1.3 misho 3138: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
1.1 misho 3139: }
3140:
3141: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3142: {
3143: lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3144: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3145: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3146: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3147: firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3148:
1.1.1.2 misho 3149: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
1.1 misho 3150: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1.1.1.4 misho 3151: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3152: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3153: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
1.1.1.2 misho 3154: #endif
1.1 misho 3155: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3156:
3157: loop = LABEL();
1.1.1.2 misho 3158: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 3159: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1.1.1.2 misho 3160: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3161: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1.1 misho 3162: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3163: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3164:
1.1.1.4 misho 3165: JUMPHERE(quit);
1.1 misho 3166: JUMPHERE(firstchar);
3167: JUMPHERE(lastchar);
3168:
3169: if (firstline)
3170: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3171: return;
3172: }
3173:
3174: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3175: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3176: firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3177: skip_char_back(common);
3178:
3179: loop = LABEL();
3180: read_char(common);
3181: lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3182: if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3183: foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3184: check_newlinechar(common, common->nltype, &newline, FALSE);
3185: set_jumps(newline, loop);
3186:
3187: if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3188: {
1.1.1.4 misho 3189: quit = JUMP(SLJIT_JUMP);
1.1 misho 3190: JUMPHERE(foundcr);
3191: notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1.1.1.2 misho 3192: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1 misho 3193: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1.1.1.4 misho 3194: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3195: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3196: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
1.1.1.2 misho 3197: #endif
1.1 misho 3198: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3199: JUMPHERE(notfoundnl);
1.1.1.4 misho 3200: JUMPHERE(quit);
1.1 misho 3201: }
3202: JUMPHERE(lastchar);
3203: JUMPHERE(firstchar);
3204:
3205: if (firstline)
1.1.1.4 misho 3206: OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
1.1 misho 3207: }
3208:
1.1.1.4 misho 3209: static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3210:
1.1 misho 3211: static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3212: {
3213: DEFINE_COMPILER;
3214: struct sljit_label *start;
1.1.1.4 misho 3215: struct sljit_jump *quit;
3216: struct sljit_jump *found = NULL;
3217: jump_list *matches = NULL;
3218: pcre_uint8 inverted_start_bits[32];
3219: int i;
1.1.1.2 misho 3220: #ifndef COMPILE_PCRE8
3221: struct sljit_jump *jump;
3222: #endif
1.1 misho 3223:
1.1.1.4 misho 3224: for (i = 0; i < 32; ++i)
3225: inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3226:
1.1 misho 3227: if (firstline)
3228: {
1.1.1.4 misho 3229: SLJIT_ASSERT(common->first_line_end != 0);
3230: OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
1.1.1.3 misho 3231: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
1.1 misho 3232: }
3233:
3234: start = LABEL();
1.1.1.4 misho 3235: quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1.1.1.2 misho 3236: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3237: #ifdef SUPPORT_UTF
3238: if (common->utf)
1.1 misho 3239: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3240: #endif
1.1.1.4 misho 3241:
3242: if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3243: {
1.1.1.2 misho 3244: #ifndef COMPILE_PCRE8
1.1.1.4 misho 3245: jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3246: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3247: JUMPHERE(jump);
1.1.1.2 misho 3248: #endif
1.1.1.4 misho 3249: OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3250: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3251: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3252: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3253: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3254: found = JUMP(SLJIT_C_NOT_ZERO);
3255: }
1.1 misho 3256:
1.1.1.2 misho 3257: #ifdef SUPPORT_UTF
3258: if (common->utf)
1.1 misho 3259: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3260: #endif
1.1.1.2 misho 3261: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 3262: #ifdef SUPPORT_UTF
3263: #if defined COMPILE_PCRE8
1.1.1.2 misho 3264: if (common->utf)
1.1 misho 3265: {
3266: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1.1.1.4 misho 3267: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
1.1.1.2 misho 3268: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3269: }
1.1.1.4 misho 3270: #elif defined COMPILE_PCRE16
1.1.1.2 misho 3271: if (common->utf)
3272: {
3273: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3274: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3275: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1.1.1.4 misho 3276: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1.1.2 misho 3277: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1.1 misho 3278: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3279: }
1.1.1.4 misho 3280: #endif /* COMPILE_PCRE[8|16] */
3281: #endif /* SUPPORT_UTF */
1.1 misho 3282: JUMPTO(SLJIT_JUMP, start);
1.1.1.4 misho 3283: if (found != NULL)
3284: JUMPHERE(found);
3285: if (matches != NULL)
3286: set_jumps(matches, LABEL());
3287: JUMPHERE(quit);
1.1 misho 3288:
3289: if (firstline)
1.1.1.4 misho 3290: OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
1.1 misho 3291: }
3292:
1.1.1.2 misho 3293: static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
1.1 misho 3294: {
3295: DEFINE_COMPILER;
3296: struct sljit_label *loop;
3297: struct sljit_jump *toolong;
3298: struct sljit_jump *alreadyfound;
3299: struct sljit_jump *found;
3300: struct sljit_jump *foundoc = NULL;
3301: struct sljit_jump *notfound;
1.1.1.4 misho 3302: pcre_uint32 oc, bit;
1.1 misho 3303:
1.1.1.3 misho 3304: SLJIT_ASSERT(common->req_char_ptr != 0);
3305: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
1.1 misho 3306: OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3307: toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3308: alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3309:
1.1.1.2 misho 3310: if (has_firstchar)
3311: OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3312: else
3313: OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3314:
3315: loop = LABEL();
3316: notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3317:
1.1.1.2 misho 3318: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3319: oc = req_char;
3320: if (caseless)
3321: {
3322: oc = TABLE_GET(req_char, common->fcc, req_char);
3323: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3324: if (req_char > 127 && common->utf)
3325: oc = UCD_OTHERCASE(req_char);
3326: #endif
3327: }
3328: if (req_char == oc)
3329: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
1.1 misho 3330: else
3331: {
1.1.1.2 misho 3332: bit = req_char ^ oc;
1.1.1.4 misho 3333: if (is_powerof2(bit))
1.1 misho 3334: {
3335: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
1.1.1.2 misho 3336: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
1.1 misho 3337: }
3338: else
3339: {
1.1.1.2 misho 3340: found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
1.1 misho 3341: foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3342: }
3343: }
1.1.1.2 misho 3344: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3345: JUMPTO(SLJIT_JUMP, loop);
3346:
3347: JUMPHERE(found);
3348: if (foundoc)
3349: JUMPHERE(foundoc);
1.1.1.3 misho 3350: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
1.1 misho 3351: JUMPHERE(alreadyfound);
3352: JUMPHERE(toolong);
3353: return notfound;
3354: }
3355:
3356: static void do_revertframes(compiler_common *common)
3357: {
3358: DEFINE_COMPILER;
3359: struct sljit_jump *jump;
3360: struct sljit_label *mainloop;
3361:
1.1.1.3 misho 3362: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 3363: OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
1.1.1.3 misho 3364: GET_LOCAL_BASE(TMP3, 0, 0);
1.1 misho 3365:
3366: /* Drop frames until we reach STACK_TOP. */
3367: mainloop = LABEL();
3368: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1.1.1.4 misho 3369: OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3370: jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3371:
1.1.1.3 misho 3372: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
1.1.1.4 misho 3373: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3374: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3375: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
1.1 misho 3376: JUMPTO(SLJIT_JUMP, mainloop);
3377:
3378: JUMPHERE(jump);
1.1.1.4 misho 3379: jump = JUMP(SLJIT_C_SIG_LESS);
1.1 misho 3380: /* End of dropping frames. */
3381: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3382:
3383: JUMPHERE(jump);
1.1.1.4 misho 3384: OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3385: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3386: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3387: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
1.1 misho 3388: JUMPTO(SLJIT_JUMP, mainloop);
3389: }
3390:
3391: static void check_wordboundary(compiler_common *common)
3392: {
3393: DEFINE_COMPILER;
1.1.1.3 misho 3394: struct sljit_jump *skipread;
1.1.1.4 misho 3395: jump_list *skipread_list = NULL;
1.1.1.2 misho 3396: #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
1.1 misho 3397: struct sljit_jump *jump;
3398: #endif
3399:
3400: SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3401:
1.1.1.3 misho 3402: sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1.1 misho 3403: /* Get type of the previous char, and put it to LOCALS1. */
3404: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3405: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3406: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
1.1.1.3 misho 3407: skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
1.1 misho 3408: skip_char_back(common);
1.1.1.3 misho 3409: check_start_used_ptr(common);
1.1 misho 3410: read_char(common);
3411:
3412: /* Testing char type. */
3413: #ifdef SUPPORT_UCP
1.1.1.2 misho 3414: if (common->use_ucp)
1.1 misho 3415: {
3416: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3417: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3418: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3419: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3420: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
1.1.1.4 misho 3421: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3422: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3423: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
1.1.1.4 misho 3424: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3425: JUMPHERE(jump);
3426: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3427: }
3428: else
3429: #endif
3430: {
1.1.1.2 misho 3431: #ifndef COMPILE_PCRE8
3432: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3433: #elif defined SUPPORT_UTF
1.1 misho 3434: /* Here LOCALS1 has already been zeroed. */
3435: jump = NULL;
1.1.1.2 misho 3436: if (common->utf)
1.1 misho 3437: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
1.1.1.2 misho 3438: #endif /* COMPILE_PCRE8 */
1.1 misho 3439: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3440: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3441: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3442: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1.1.1.2 misho 3443: #ifndef COMPILE_PCRE8
3444: JUMPHERE(jump);
3445: #elif defined SUPPORT_UTF
1.1 misho 3446: if (jump != NULL)
3447: JUMPHERE(jump);
1.1.1.2 misho 3448: #endif /* COMPILE_PCRE8 */
1.1 misho 3449: }
1.1.1.3 misho 3450: JUMPHERE(skipread);
1.1 misho 3451:
3452: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
1.1.1.4 misho 3453: check_str_end(common, &skipread_list);
1.1 misho 3454: peek_char(common);
3455:
3456: /* Testing char type. This is a code duplication. */
3457: #ifdef SUPPORT_UCP
1.1.1.2 misho 3458: if (common->use_ucp)
1.1 misho 3459: {
3460: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3461: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3462: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3463: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3464: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
1.1.1.4 misho 3465: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3466: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3467: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
1.1.1.4 misho 3468: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3469: JUMPHERE(jump);
3470: }
3471: else
3472: #endif
3473: {
1.1.1.2 misho 3474: #ifndef COMPILE_PCRE8
3475: /* TMP2 may be destroyed by peek_char. */
3476: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3477: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3478: #elif defined SUPPORT_UTF
1.1 misho 3479: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3480: jump = NULL;
1.1.1.2 misho 3481: if (common->utf)
1.1 misho 3482: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3483: #endif
3484: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3485: OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3486: OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1.1.1.2 misho 3487: #ifndef COMPILE_PCRE8
3488: JUMPHERE(jump);
3489: #elif defined SUPPORT_UTF
1.1 misho 3490: if (jump != NULL)
3491: JUMPHERE(jump);
1.1.1.2 misho 3492: #endif /* COMPILE_PCRE8 */
1.1 misho 3493: }
1.1.1.4 misho 3494: set_jumps(skipread_list, LABEL());
3495:
3496: OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3497: sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3498: }
3499:
3500: /*
3501: range format:
3502:
3503: ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3504: ranges[1] = first bit (0 or 1)
3505: ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3506: */
3507:
3508: static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3509: {
3510: DEFINE_COMPILER;
3511: struct sljit_jump *jump;
3512:
3513: if (ranges[0] < 0)
3514: return FALSE;
3515:
3516: switch(ranges[0])
3517: {
3518: case 1:
3519: if (readch)
3520: read_char(common);
3521: add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3522: return TRUE;
3523:
3524: case 2:
3525: if (readch)
3526: read_char(common);
3527: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3528: add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3529: return TRUE;
3530:
3531: case 4:
3532: if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3533: {
3534: if (readch)
3535: read_char(common);
3536: if (ranges[1] != 0)
3537: {
3538: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3539: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3540: }
3541: else
3542: {
3543: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3544: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3545: JUMPHERE(jump);
3546: }
3547: return TRUE;
3548: }
3549: if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3550: {
3551: if (readch)
3552: read_char(common);
3553: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3554: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3555: add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3556: return TRUE;
3557: }
3558: return FALSE;
3559:
3560: default:
3561: return FALSE;
3562: }
3563: }
3564:
3565: static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3566: {
3567: int i, bit, length;
3568: const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3569:
3570: bit = ctypes[0] & flag;
3571: ranges[0] = -1;
3572: ranges[1] = bit != 0 ? 1 : 0;
3573: length = 0;
3574:
3575: for (i = 1; i < 256; i++)
3576: if ((ctypes[i] & flag) != bit)
3577: {
3578: if (length >= MAX_RANGE_SIZE)
3579: return;
3580: ranges[2 + length] = i;
3581: length++;
3582: bit ^= flag;
3583: }
3584:
3585: if (bit != 0)
3586: {
3587: if (length >= MAX_RANGE_SIZE)
3588: return;
3589: ranges[2 + length] = 256;
3590: length++;
3591: }
3592: ranges[0] = length;
3593: }
1.1 misho 3594:
1.1.1.4 misho 3595: static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3596: {
3597: int ranges[2 + MAX_RANGE_SIZE];
3598: pcre_uint8 bit, cbit, all;
3599: int i, byte, length = 0;
3600:
3601: bit = bits[0] & 0x1;
3602: ranges[1] = bit;
3603: /* Can be 0 or 255. */
3604: all = -bit;
3605:
3606: for (i = 0; i < 256; )
3607: {
3608: byte = i >> 3;
3609: if ((i & 0x7) == 0 && bits[byte] == all)
3610: i += 8;
3611: else
3612: {
3613: cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3614: if (cbit != bit)
3615: {
3616: if (length >= MAX_RANGE_SIZE)
3617: return FALSE;
3618: ranges[2 + length] = i;
3619: length++;
3620: bit = cbit;
3621: all = -cbit;
3622: }
3623: i++;
3624: }
3625: }
3626:
3627: if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3628: {
3629: if (length >= MAX_RANGE_SIZE)
3630: return FALSE;
3631: ranges[2 + length] = 256;
3632: length++;
3633: }
3634: ranges[0] = length;
3635:
3636: return check_ranges(common, ranges, backtracks, FALSE);
1.1 misho 3637: }
3638:
3639: static void check_anynewline(compiler_common *common)
3640: {
3641: /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3642: DEFINE_COMPILER;
3643:
1.1.1.3 misho 3644: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 3645:
3646: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3647: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
1.1.1.4 misho 3648: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3649: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
1.1.1.4 misho 3650: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2 misho 3651: #ifdef COMPILE_PCRE8
3652: if (common->utf)
1.1 misho 3653: {
1.1.1.2 misho 3654: #endif
1.1.1.4 misho 3655: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3656: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3657: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
1.1.1.2 misho 3658: #ifdef COMPILE_PCRE8
1.1 misho 3659: }
3660: #endif
1.1.1.4 misho 3661: #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3662: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3663: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3664: }
3665:
3666: static void check_hspace(compiler_common *common)
3667: {
3668: /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3669: DEFINE_COMPILER;
3670:
1.1.1.3 misho 3671: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 3672:
3673: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
1.1.1.4 misho 3674: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1 misho 3675: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1.1.1.4 misho 3676: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3677: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
1.1.1.4 misho 3678: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2 misho 3679: #ifdef COMPILE_PCRE8
3680: if (common->utf)
1.1 misho 3681: {
1.1.1.2 misho 3682: #endif
1.1.1.4 misho 3683: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3684: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
1.1.1.4 misho 3685: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3686: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
1.1.1.4 misho 3687: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3688: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3689: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
1.1.1.4 misho 3690: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3691: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
1.1.1.4 misho 3692: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3693: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
1.1.1.4 misho 3694: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3695: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
1.1.1.2 misho 3696: #ifdef COMPILE_PCRE8
1.1 misho 3697: }
3698: #endif
1.1.1.4 misho 3699: #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3700: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3701:
3702: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3703: }
3704:
3705: static void check_vspace(compiler_common *common)
3706: {
3707: /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3708: DEFINE_COMPILER;
3709:
1.1.1.3 misho 3710: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 3711:
3712: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3713: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
1.1.1.4 misho 3714: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 3715: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
1.1.1.4 misho 3716: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2 misho 3717: #ifdef COMPILE_PCRE8
3718: if (common->utf)
1.1 misho 3719: {
1.1.1.2 misho 3720: #endif
1.1.1.4 misho 3721: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3722: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3723: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
1.1.1.2 misho 3724: #ifdef COMPILE_PCRE8
1.1 misho 3725: }
3726: #endif
1.1.1.4 misho 3727: #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3728: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 3729:
3730: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3731: }
3732:
3733: #define CHAR1 STR_END
3734: #define CHAR2 STACK_TOP
3735:
3736: static void do_casefulcmp(compiler_common *common)
3737: {
3738: DEFINE_COMPILER;
3739: struct sljit_jump *jump;
3740: struct sljit_label *label;
3741:
1.1.1.3 misho 3742: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 3743: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3744: OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3745: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
1.1.1.2 misho 3746: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3747: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3748:
3749: label = LABEL();
1.1.1.2 misho 3750: OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3751: OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1.1 misho 3752: jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
1.1.1.2 misho 3753: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3754: JUMPTO(SLJIT_C_NOT_ZERO, label);
3755:
3756: JUMPHERE(jump);
1.1.1.2 misho 3757: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3758: OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3759: OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3760: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3761: }
3762:
3763: #define LCC_TABLE STACK_LIMIT
3764:
3765: static void do_caselesscmp(compiler_common *common)
3766: {
3767: DEFINE_COMPILER;
3768: struct sljit_jump *jump;
3769: struct sljit_label *label;
3770:
1.1.1.3 misho 3771: sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1.1 misho 3772: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3773:
3774: OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3775: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3776: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3777: OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
1.1.1.2 misho 3778: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3779: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3780:
3781: label = LABEL();
1.1.1.2 misho 3782: OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3783: OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3784: #ifndef COMPILE_PCRE8
3785: jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3786: #endif
1.1 misho 3787: OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
1.1.1.2 misho 3788: #ifndef COMPILE_PCRE8
3789: JUMPHERE(jump);
3790: jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3791: #endif
1.1 misho 3792: OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
1.1.1.2 misho 3793: #ifndef COMPILE_PCRE8
3794: JUMPHERE(jump);
3795: #endif
1.1 misho 3796: jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
1.1.1.2 misho 3797: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3798: JUMPTO(SLJIT_C_NOT_ZERO, label);
3799:
3800: JUMPHERE(jump);
1.1.1.2 misho 3801: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 3802: OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3803: OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3804: OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3805: sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3806: }
3807:
3808: #undef LCC_TABLE
3809: #undef CHAR1
3810: #undef CHAR2
3811:
1.1.1.2 misho 3812: #if defined SUPPORT_UTF && defined SUPPORT_UCP
1.1 misho 3813:
1.1.1.4 misho 3814: static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
1.1 misho 3815: {
3816: /* This function would be ineffective to do in JIT level. */
1.1.1.4 misho 3817: pcre_uint32 c1, c2;
1.1.1.3 misho 3818: const pcre_uchar *src2 = args->uchar_ptr;
1.1.1.2 misho 3819: const pcre_uchar *end2 = args->end;
1.1.1.4 misho 3820: const ucd_record *ur;
3821: const pcre_uint32 *pp;
1.1 misho 3822:
3823: while (src1 < end1)
3824: {
3825: if (src2 >= end2)
1.1.1.3 misho 3826: return (pcre_uchar*)1;
1.1 misho 3827: GETCHARINC(c1, src1);
3828: GETCHARINC(c2, src2);
1.1.1.4 misho 3829: ur = GET_UCD(c2);
3830: if (c1 != c2 && c1 != c2 + ur->other_case)
3831: {
3832: pp = PRIV(ucd_caseless_sets) + ur->caseset;
3833: for (;;)
3834: {
3835: if (c1 < *pp) return NULL;
3836: if (c1 == *pp++) break;
3837: }
3838: }
1.1 misho 3839: }
3840: return src2;
3841: }
3842:
1.1.1.2 misho 3843: #endif /* SUPPORT_UTF && SUPPORT_UCP */
1.1 misho 3844:
1.1.1.2 misho 3845: static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
1.1.1.3 misho 3846: compare_context* context, jump_list **backtracks)
1.1 misho 3847: {
3848: DEFINE_COMPILER;
3849: unsigned int othercasebit = 0;
1.1.1.2 misho 3850: pcre_uchar *othercasechar = NULL;
3851: #ifdef SUPPORT_UTF
3852: int utflength;
1.1 misho 3853: #endif
3854:
3855: if (caseless && char_has_othercase(common, cc))
3856: {
3857: othercasebit = char_get_othercase_bit(common, cc);
3858: SLJIT_ASSERT(othercasebit);
3859: /* Extracting bit difference info. */
1.1.1.4 misho 3860: #if defined COMPILE_PCRE8
1.1.1.2 misho 3861: othercasechar = cc + (othercasebit >> 8);
1.1 misho 3862: othercasebit &= 0xff;
1.1.1.4 misho 3863: #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3864: /* Note that this code only handles characters in the BMP. If there
3865: ever are characters outside the BMP whose othercase differs in only one
3866: bit from itself (there currently are none), this code will need to be
3867: revised for COMPILE_PCRE32. */
1.1.1.2 misho 3868: othercasechar = cc + (othercasebit >> 9);
3869: if ((othercasebit & 0x100) != 0)
3870: othercasebit = (othercasebit & 0xff) << 8;
3871: else
3872: othercasebit &= 0xff;
1.1.1.4 misho 3873: #endif /* COMPILE_PCRE[8|16|32] */
1.1 misho 3874: }
3875:
3876: if (context->sourcereg == -1)
3877: {
1.1.1.4 misho 3878: #if defined COMPILE_PCRE8
1.1 misho 3879: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3880: if (context->length >= 4)
3881: OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3882: else if (context->length >= 2)
1.1.1.2 misho 3883: OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
1.1 misho 3884: else
3885: #endif
3886: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
1.1.1.4 misho 3887: #elif defined COMPILE_PCRE16
1.1.1.2 misho 3888: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3889: if (context->length >= 4)
3890: OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3891: else
3892: #endif
1.1.1.4 misho 3893: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3894: #elif defined COMPILE_PCRE32
3895: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3896: #endif /* COMPILE_PCRE[8|16|32] */
1.1 misho 3897: context->sourcereg = TMP2;
3898: }
3899:
1.1.1.2 misho 3900: #ifdef SUPPORT_UTF
3901: utflength = 1;
3902: if (common->utf && HAS_EXTRALEN(*cc))
3903: utflength += GET_EXTRALEN(*cc);
1.1 misho 3904:
3905: do
3906: {
3907: #endif
3908:
1.1.1.2 misho 3909: context->length -= IN_UCHARS(1);
1.1.1.4 misho 3910: #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
1.1 misho 3911:
3912: /* Unaligned read is supported. */
1.1.1.2 misho 3913: if (othercasebit != 0 && othercasechar == cc)
1.1 misho 3914: {
1.1.1.2 misho 3915: context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3916: context->oc.asuchars[context->ucharptr] = othercasebit;
1.1 misho 3917: }
3918: else
3919: {
1.1.1.2 misho 3920: context->c.asuchars[context->ucharptr] = *cc;
3921: context->oc.asuchars[context->ucharptr] = 0;
1.1 misho 3922: }
1.1.1.2 misho 3923: context->ucharptr++;
1.1 misho 3924:
1.1.1.4 misho 3925: #if defined COMPILE_PCRE8
1.1.1.2 misho 3926: if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3927: #else
3928: if (context->ucharptr >= 2 || context->length == 0)
3929: #endif
1.1 misho 3930: {
3931: if (context->length >= 4)
3932: OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3933: else if (context->length >= 2)
1.1.1.2 misho 3934: OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
1.1.1.4 misho 3935: #if defined COMPILE_PCRE8
1.1 misho 3936: else if (context->length >= 1)
3937: OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
1.1.1.4 misho 3938: #endif /* COMPILE_PCRE8 */
1.1 misho 3939: context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3940:
1.1.1.2 misho 3941: switch(context->ucharptr)
1.1 misho 3942: {
1.1.1.2 misho 3943: case 4 / sizeof(pcre_uchar):
1.1 misho 3944: if (context->oc.asint != 0)
3945: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
1.1.1.3 misho 3946: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
1.1 misho 3947: break;
3948:
1.1.1.2 misho 3949: case 2 / sizeof(pcre_uchar):
3950: if (context->oc.asushort != 0)
3951: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
1.1.1.3 misho 3952: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
1.1 misho 3953: break;
3954:
1.1.1.2 misho 3955: #ifdef COMPILE_PCRE8
1.1 misho 3956: case 1:
3957: if (context->oc.asbyte != 0)
3958: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
1.1.1.3 misho 3959: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
1.1 misho 3960: break;
1.1.1.2 misho 3961: #endif
1.1 misho 3962:
3963: default:
3964: SLJIT_ASSERT_STOP();
3965: break;
3966: }
1.1.1.2 misho 3967: context->ucharptr = 0;
1.1 misho 3968: }
3969:
3970: #else
3971:
1.1.1.4 misho 3972: /* Unaligned read is unsupported or in 32 bit mode. */
3973: if (context->length >= 1)
3974: OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3975:
1.1 misho 3976: context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3977:
1.1.1.2 misho 3978: if (othercasebit != 0 && othercasechar == cc)
1.1 misho 3979: {
3980: OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
1.1.1.3 misho 3981: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
1.1 misho 3982: }
3983: else
1.1.1.3 misho 3984: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
1.1 misho 3985:
3986: #endif
3987:
3988: cc++;
1.1.1.2 misho 3989: #ifdef SUPPORT_UTF
3990: utflength--;
1.1 misho 3991: }
1.1.1.2 misho 3992: while (utflength > 0);
1.1 misho 3993: #endif
3994:
3995: return cc;
3996: }
3997:
1.1.1.2 misho 3998: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 3999:
4000: #define SET_TYPE_OFFSET(value) \
4001: if ((value) != typeoffset) \
4002: { \
4003: if ((value) > typeoffset) \
4004: OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4005: else \
4006: OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4007: } \
4008: typeoffset = (value);
4009:
4010: #define SET_CHAR_OFFSET(value) \
4011: if ((value) != charoffset) \
4012: { \
4013: if ((value) > charoffset) \
4014: OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4015: else \
4016: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4017: } \
4018: charoffset = (value);
4019:
1.1.1.4 misho 4020: static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
1.1 misho 4021: {
4022: DEFINE_COMPILER;
4023: jump_list *found = NULL;
1.1.1.3 misho 4024: jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
1.1.1.4 misho 4025: pcre_int32 c, charoffset;
1.1 misho 4026: struct sljit_jump *jump = NULL;
1.1.1.2 misho 4027: pcre_uchar *ccbegin;
1.1.1.4 misho 4028: int compares, invertcmp, numberofcmps;
1.1.1.5 ! misho 4029:
1.1 misho 4030: #ifdef SUPPORT_UCP
4031: BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4032: BOOL charsaved = FALSE;
4033: int typereg = TMP1, scriptreg = TMP1;
1.1.1.5 ! misho 4034: const pcre_uint32 *other_cases;
1.1.1.4 misho 4035: pcre_int32 typeoffset;
1.1 misho 4036: #endif
4037:
1.1.1.4 misho 4038: /* Although SUPPORT_UTF must be defined, we are
4039: not necessary in utf mode even in 8 bit mode. */
1.1.1.3 misho 4040: detect_partial_match(common, backtracks);
1.1 misho 4041: read_char(common);
4042:
4043: if ((*cc++ & XCL_MAP) != 0)
4044: {
4045: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1.1.1.2 misho 4046: #ifndef COMPILE_PCRE8
4047: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4048: #elif defined SUPPORT_UTF
4049: if (common->utf)
1.1 misho 4050: jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
1.1.1.2 misho 4051: #endif
1.1 misho 4052:
1.1.1.4 misho 4053: if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4054: {
4055: OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4056: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4057: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4058: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4059: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4060: add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4061: }
1.1 misho 4062:
1.1.1.2 misho 4063: #ifndef COMPILE_PCRE8
4064: JUMPHERE(jump);
4065: #elif defined SUPPORT_UTF
4066: if (common->utf)
1.1 misho 4067: JUMPHERE(jump);
1.1.1.2 misho 4068: #endif
1.1 misho 4069: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4070: #ifdef SUPPORT_UCP
4071: charsaved = TRUE;
4072: #endif
1.1.1.2 misho 4073: cc += 32 / sizeof(pcre_uchar);
1.1 misho 4074: }
4075:
4076: /* Scanning the necessary info. */
4077: ccbegin = cc;
4078: compares = 0;
4079: while (*cc != XCL_END)
4080: {
4081: compares++;
4082: if (*cc == XCL_SINGLE)
4083: {
4084: cc += 2;
1.1.1.2 misho 4085: #ifdef SUPPORT_UTF
4086: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1.1 misho 4087: #endif
4088: #ifdef SUPPORT_UCP
4089: needschar = TRUE;
4090: #endif
4091: }
4092: else if (*cc == XCL_RANGE)
4093: {
4094: cc += 2;
1.1.1.2 misho 4095: #ifdef SUPPORT_UTF
4096: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1.1 misho 4097: #endif
4098: cc++;
1.1.1.2 misho 4099: #ifdef SUPPORT_UTF
4100: if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1.1 misho 4101: #endif
4102: #ifdef SUPPORT_UCP
4103: needschar = TRUE;
4104: #endif
4105: }
4106: #ifdef SUPPORT_UCP
4107: else
4108: {
4109: SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4110: cc++;
4111: switch(*cc)
4112: {
4113: case PT_ANY:
4114: break;
4115:
4116: case PT_LAMP:
4117: case PT_GC:
4118: case PT_PC:
4119: case PT_ALNUM:
4120: needstype = TRUE;
4121: break;
4122:
4123: case PT_SC:
4124: needsscript = TRUE;
4125: break;
4126:
4127: case PT_SPACE:
4128: case PT_PXSPACE:
4129: case PT_WORD:
1.1.1.5 ! misho 4130: case PT_PXGRAPH:
! 4131: case PT_PXPRINT:
! 4132: case PT_PXPUNCT:
1.1 misho 4133: needstype = TRUE;
4134: needschar = TRUE;
4135: break;
4136:
1.1.1.4 misho 4137: case PT_CLIST:
4138: case PT_UCNC:
4139: needschar = TRUE;
4140: break;
4141:
1.1 misho 4142: default:
4143: SLJIT_ASSERT_STOP();
4144: break;
4145: }
4146: cc += 2;
4147: }
4148: #endif
4149: }
4150:
4151: #ifdef SUPPORT_UCP
4152: /* Simple register allocation. TMP1 is preferred if possible. */
4153: if (needstype || needsscript)
4154: {
4155: if (needschar && !charsaved)
4156: OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4157: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4158: if (needschar)
4159: {
4160: if (needstype)
4161: {
4162: OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4163: typereg = RETURN_ADDR;
4164: }
4165:
4166: if (needsscript)
4167: scriptreg = TMP3;
4168: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4169: }
4170: else if (needstype && needsscript)
4171: scriptreg = TMP3;
4172: /* In all other cases only one of them was specified, and that can goes to TMP1. */
4173:
4174: if (needsscript)
4175: {
4176: if (scriptreg == TMP1)
4177: {
1.1.1.4 misho 4178: OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
1.1 misho 4179: OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4180: }
4181: else
4182: {
4183: OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
1.1.1.4 misho 4184: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
1.1 misho 4185: OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4186: }
4187: }
4188: }
4189: #endif
4190:
4191: /* Generating code. */
4192: cc = ccbegin;
4193: charoffset = 0;
4194: numberofcmps = 0;
4195: #ifdef SUPPORT_UCP
4196: typeoffset = 0;
4197: #endif
4198:
4199: while (*cc != XCL_END)
4200: {
4201: compares--;
1.1.1.3 misho 4202: invertcmp = (compares == 0 && list != backtracks);
1.1 misho 4203: jump = NULL;
4204:
4205: if (*cc == XCL_SINGLE)
4206: {
4207: cc ++;
1.1.1.2 misho 4208: #ifdef SUPPORT_UTF
4209: if (common->utf)
1.1 misho 4210: {
4211: GETCHARINC(c, cc);
4212: }
4213: else
4214: #endif
4215: c = *cc++;
4216:
4217: if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4218: {
4219: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
1.1.1.4 misho 4220: OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 4221: numberofcmps++;
4222: }
4223: else if (numberofcmps > 0)
4224: {
4225: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
1.1.1.4 misho 4226: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 4227: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4228: numberofcmps = 0;
4229: }
4230: else
4231: {
4232: jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4233: numberofcmps = 0;
4234: }
4235: }
4236: else if (*cc == XCL_RANGE)
4237: {
4238: cc ++;
1.1.1.2 misho 4239: #ifdef SUPPORT_UTF
4240: if (common->utf)
1.1 misho 4241: {
4242: GETCHARINC(c, cc);
4243: }
4244: else
4245: #endif
4246: c = *cc++;
4247: SET_CHAR_OFFSET(c);
1.1.1.2 misho 4248: #ifdef SUPPORT_UTF
4249: if (common->utf)
1.1 misho 4250: {
4251: GETCHARINC(c, cc);
4252: }
4253: else
4254: #endif
4255: c = *cc++;
4256: if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4257: {
4258: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
1.1.1.4 misho 4259: OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 4260: numberofcmps++;
4261: }
4262: else if (numberofcmps > 0)
4263: {
4264: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
1.1.1.4 misho 4265: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 4266: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4267: numberofcmps = 0;
4268: }
4269: else
4270: {
4271: jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4272: numberofcmps = 0;
4273: }
4274: }
4275: #ifdef SUPPORT_UCP
4276: else
4277: {
4278: if (*cc == XCL_NOTPROP)
4279: invertcmp ^= 0x1;
4280: cc++;
4281: switch(*cc)
4282: {
4283: case PT_ANY:
1.1.1.3 misho 4284: if (list != backtracks)
1.1 misho 4285: {
4286: if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4287: continue;
4288: }
4289: else if (cc[-1] == XCL_NOTPROP)
4290: continue;
4291: jump = JUMP(SLJIT_JUMP);
4292: break;
4293:
4294: case PT_LAMP:
4295: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
1.1.1.4 misho 4296: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1 misho 4297: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
1.1.1.4 misho 4298: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 4299: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
1.1.1.4 misho 4300: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 4301: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4302: break;
4303:
4304: case PT_GC:
1.1.1.2 misho 4305: c = PRIV(ucp_typerange)[(int)cc[1] * 2];
1.1 misho 4306: SET_TYPE_OFFSET(c);
1.1.1.2 misho 4307: jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
1.1 misho 4308: break;
4309:
4310: case PT_PC:
4311: jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4312: break;
4313:
4314: case PT_SC:
4315: jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4316: break;
4317:
4318: case PT_SPACE:
4319: case PT_PXSPACE:
4320: SET_CHAR_OFFSET(9);
1.1.1.5 ! misho 4321: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
1.1.1.4 misho 4322: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
1.1.1.5 ! misho 4323:
! 4324: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
! 4325: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
! 4326:
! 4327: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
! 4328: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1 misho 4329:
4330: SET_TYPE_OFFSET(ucp_Zl);
4331: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
1.1.1.4 misho 4332: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 4333: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4334: break;
4335:
4336: case PT_WORD:
4337: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
1.1.1.4 misho 4338: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4339: /* Fall through. */
1.1 misho 4340:
4341: case PT_ALNUM:
4342: SET_TYPE_OFFSET(ucp_Ll);
4343: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
1.1.1.4 misho 4344: OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
1.1 misho 4345: SET_TYPE_OFFSET(ucp_Nd);
4346: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
1.1.1.4 misho 4347: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4348: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4349: break;
4350:
4351: case PT_CLIST:
4352: other_cases = PRIV(ucd_caseless_sets) + cc[1];
4353:
4354: /* At least three characters are required.
4355: Otherwise this case would be handled by the normal code path. */
4356: SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4357: SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4358:
4359: /* Optimizing character pairs, if their difference is power of 2. */
4360: if (is_powerof2(other_cases[1] ^ other_cases[0]))
4361: {
4362: if (charoffset == 0)
4363: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4364: else
4365: {
4366: OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4367: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4368: }
4369: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4370: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4371: other_cases += 2;
4372: }
4373: else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4374: {
4375: if (charoffset == 0)
4376: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4377: else
4378: {
4379: OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4380: OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4381: }
4382: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4383: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4384:
4385: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4386: OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4387:
4388: other_cases += 3;
4389: }
4390: else
4391: {
4392: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4393: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4394: }
4395:
4396: while (*other_cases != NOTACHAR)
4397: {
4398: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4399: OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4400: }
4401: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4402: break;
4403:
4404: case PT_UCNC:
4405: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4406: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4407: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4408: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4409: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4410: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4411:
4412: SET_CHAR_OFFSET(0xa0);
4413: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4414: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4415: SET_CHAR_OFFSET(0);
4416: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4417: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1.1 misho 4418: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4419: break;
1.1.1.5 ! misho 4420:
! 4421: case PT_PXGRAPH:
! 4422: /* C and Z groups are the farthest two groups. */
! 4423: SET_TYPE_OFFSET(ucp_Ll);
! 4424: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
! 4425: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
! 4426:
! 4427: jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
! 4428:
! 4429: /* In case of ucp_Cf, we overwrite the result. */
! 4430: SET_CHAR_OFFSET(0x2066);
! 4431: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
! 4432: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
! 4433:
! 4434: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
! 4435: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
! 4436:
! 4437: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
! 4438: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
! 4439:
! 4440: JUMPHERE(jump);
! 4441: jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
! 4442: break;
! 4443:
! 4444: case PT_PXPRINT:
! 4445: /* C and Z groups are the farthest two groups. */
! 4446: SET_TYPE_OFFSET(ucp_Ll);
! 4447: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
! 4448: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
! 4449:
! 4450: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
! 4451: OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
! 4452:
! 4453: jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
! 4454:
! 4455: /* In case of ucp_Cf, we overwrite the result. */
! 4456: SET_CHAR_OFFSET(0x2066);
! 4457: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
! 4458: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
! 4459:
! 4460: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
! 4461: OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
! 4462:
! 4463: JUMPHERE(jump);
! 4464: jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
! 4465: break;
! 4466:
! 4467: case PT_PXPUNCT:
! 4468: SET_TYPE_OFFSET(ucp_Sc);
! 4469: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
! 4470: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
! 4471:
! 4472: SET_CHAR_OFFSET(0);
! 4473: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
! 4474: OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
! 4475:
! 4476: SET_TYPE_OFFSET(ucp_Pc);
! 4477: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
! 4478: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
! 4479: jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
! 4480: break;
1.1 misho 4481: }
4482: cc += 2;
4483: }
4484: #endif
4485:
4486: if (jump != NULL)
1.1.1.3 misho 4487: add_jump(compiler, compares > 0 ? list : backtracks, jump);
1.1 misho 4488: }
4489:
4490: if (found != NULL)
4491: set_jumps(found, LABEL());
4492: }
4493:
4494: #undef SET_TYPE_OFFSET
4495: #undef SET_CHAR_OFFSET
4496:
4497: #endif
4498:
1.1.1.4 misho 4499: static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
1.1 misho 4500: {
4501: DEFINE_COMPILER;
4502: int length;
4503: unsigned int c, oc, bit;
4504: compare_context context;
4505: struct sljit_jump *jump[4];
1.1.1.4 misho 4506: jump_list *end_list;
1.1.1.2 misho 4507: #ifdef SUPPORT_UTF
1.1 misho 4508: struct sljit_label *label;
4509: #ifdef SUPPORT_UCP
1.1.1.2 misho 4510: pcre_uchar propdata[5];
1.1 misho 4511: #endif
4512: #endif
4513:
4514: switch(type)
4515: {
4516: case OP_SOD:
4517: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4518: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1.1.1.3 misho 4519: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
1.1 misho 4520: return cc;
4521:
4522: case OP_SOM:
4523: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4524: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1.1.1.3 misho 4525: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
1.1 misho 4526: return cc;
4527:
4528: case OP_NOT_WORD_BOUNDARY:
4529: case OP_WORD_BOUNDARY:
4530: add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
1.1.1.3 misho 4531: add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1.1 misho 4532: return cc;
4533:
4534: case OP_NOT_DIGIT:
4535: case OP_DIGIT:
1.1.1.4 misho 4536: /* Digits are usually 0-9, so it is worth to optimize them. */
4537: if (common->digits[0] == -2)
4538: get_ctype_ranges(common, ctype_digit, common->digits);
1.1.1.3 misho 4539: detect_partial_match(common, backtracks);
1.1.1.4 misho 4540: /* Flip the starting bit in the negative case. */
4541: if (type == OP_NOT_DIGIT)
4542: common->digits[1] ^= 1;
4543: if (!check_ranges(common, common->digits, backtracks, TRUE))
4544: {
4545: read_char8_type(common);
4546: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4547: add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4548: }
4549: if (type == OP_NOT_DIGIT)
4550: common->digits[1] ^= 1;
1.1 misho 4551: return cc;
4552:
4553: case OP_NOT_WHITESPACE:
4554: case OP_WHITESPACE:
1.1.1.3 misho 4555: detect_partial_match(common, backtracks);
1.1 misho 4556: read_char8_type(common);
4557: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
1.1.1.3 misho 4558: add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
1.1 misho 4559: return cc;
4560:
4561: case OP_NOT_WORDCHAR:
4562: case OP_WORDCHAR:
1.1.1.3 misho 4563: detect_partial_match(common, backtracks);
1.1 misho 4564: read_char8_type(common);
4565: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
1.1.1.3 misho 4566: add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
1.1 misho 4567: return cc;
4568:
4569: case OP_ANY:
1.1.1.3 misho 4570: detect_partial_match(common, backtracks);
1.1 misho 4571: read_char(common);
4572: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4573: {
4574: jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
1.1.1.4 misho 4575: end_list = NULL;
1.1.1.3 misho 4576: if (common->mode != JIT_PARTIAL_HARD_COMPILE)
1.1.1.4 misho 4577: add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1.1.1.3 misho 4578: else
1.1.1.4 misho 4579: check_str_end(common, &end_list);
1.1.1.3 misho 4580:
1.1.1.2 misho 4581: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1.1.3 misho 4582: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
1.1.1.4 misho 4583: set_jumps(end_list, LABEL());
1.1 misho 4584: JUMPHERE(jump[0]);
4585: }
4586: else
1.1.1.3 misho 4587: check_newlinechar(common, common->nltype, backtracks, TRUE);
1.1 misho 4588: return cc;
4589:
4590: case OP_ALLANY:
1.1.1.3 misho 4591: detect_partial_match(common, backtracks);
1.1.1.2 misho 4592: #ifdef SUPPORT_UTF
4593: if (common->utf)
1.1 misho 4594: {
1.1.1.2 misho 4595: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4596: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 4597: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4598: #if defined COMPILE_PCRE8
1.1 misho 4599: jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1.1.1.4 misho 4600: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
1.1 misho 4601: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1.1.1.4 misho 4602: #elif defined COMPILE_PCRE16
1.1.1.2 misho 4603: jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4604: OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4605: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1.1.1.4 misho 4606: OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
1.1.1.2 misho 4607: OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4608: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1.1.1.4 misho 4609: #endif
1.1 misho 4610: JUMPHERE(jump[0]);
1.1.1.4 misho 4611: #endif /* COMPILE_PCRE[8|16] */
1.1 misho 4612: return cc;
4613: }
4614: #endif
1.1.1.2 misho 4615: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 4616: return cc;
4617:
4618: case OP_ANYBYTE:
1.1.1.3 misho 4619: detect_partial_match(common, backtracks);
1.1.1.2 misho 4620: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 4621: return cc;
4622:
1.1.1.2 misho 4623: #ifdef SUPPORT_UTF
1.1 misho 4624: #ifdef SUPPORT_UCP
4625: case OP_NOTPROP:
4626: case OP_PROP:
4627: propdata[0] = 0;
4628: propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4629: propdata[2] = cc[0];
4630: propdata[3] = cc[1];
4631: propdata[4] = XCL_END;
1.1.1.4 misho 4632: compile_xclass_matchingpath(common, propdata, backtracks);
1.1 misho 4633: return cc + 2;
4634: #endif
4635: #endif
4636:
4637: case OP_ANYNL:
1.1.1.3 misho 4638: detect_partial_match(common, backtracks);
1.1 misho 4639: read_char(common);
4640: jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1.1.1.3 misho 4641: /* We don't need to handle soft partial matching case. */
1.1.1.4 misho 4642: end_list = NULL;
1.1.1.3 misho 4643: if (common->mode != JIT_PARTIAL_HARD_COMPILE)
1.1.1.4 misho 4644: add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1.1.1.3 misho 4645: else
1.1.1.4 misho 4646: check_str_end(common, &end_list);
1.1.1.2 misho 4647: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1.1.1.4 misho 4648: jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
1.1.1.2 misho 4649: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.4 misho 4650: jump[2] = JUMP(SLJIT_JUMP);
1.1 misho 4651: JUMPHERE(jump[0]);
1.1.1.3 misho 4652: check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
1.1.1.4 misho 4653: set_jumps(end_list, LABEL());
1.1 misho 4654: JUMPHERE(jump[1]);
4655: JUMPHERE(jump[2]);
4656: return cc;
4657:
4658: case OP_NOT_HSPACE:
4659: case OP_HSPACE:
1.1.1.3 misho 4660: detect_partial_match(common, backtracks);
1.1 misho 4661: read_char(common);
4662: add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
1.1.1.3 misho 4663: add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1.1 misho 4664: return cc;
4665:
4666: case OP_NOT_VSPACE:
4667: case OP_VSPACE:
1.1.1.3 misho 4668: detect_partial_match(common, backtracks);
1.1 misho 4669: read_char(common);
4670: add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
1.1.1.3 misho 4671: add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1.1 misho 4672: return cc;
4673:
4674: #ifdef SUPPORT_UCP
4675: case OP_EXTUNI:
1.1.1.3 misho 4676: detect_partial_match(common, backtracks);
1.1 misho 4677: read_char(common);
4678: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 4679: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4680: /* Optimize register allocation: use a real register. */
4681: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4682: OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1.1 misho 4683:
4684: label = LABEL();
4685: jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4686: OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4687: read_char(common);
4688: add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 4689: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4690: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4691:
4692: OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4693: OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4694: OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4695: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4696: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4697: JUMPTO(SLJIT_C_NOT_ZERO, label);
1.1 misho 4698:
4699: OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4700: JUMPHERE(jump[0]);
1.1.1.4 misho 4701: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4702:
1.1.1.3 misho 4703: if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4704: {
4705: jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4706: /* Since we successfully read a char above, partial matching must occure. */
4707: check_partial(common, TRUE);
4708: JUMPHERE(jump[0]);
4709: }
1.1 misho 4710: return cc;
4711: #endif
4712:
4713: case OP_EODN:
1.1.1.3 misho 4714: /* Requires rather complex checks. */
1.1 misho 4715: jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4716: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4717: {
1.1.1.2 misho 4718: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4719: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1.1.1.3 misho 4720: if (common->mode == JIT_COMPILE)
4721: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4722: else
4723: {
4724: jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4725: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
1.1.1.4 misho 4726: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
1.1.1.3 misho 4727: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
1.1.1.4 misho 4728: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
1.1.1.3 misho 4729: add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4730: check_partial(common, TRUE);
4731: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4732: JUMPHERE(jump[1]);
4733: }
1.1.1.2 misho 4734: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1.1.1.3 misho 4735: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4736: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
1.1 misho 4737: }
4738: else if (common->nltype == NLTYPE_FIXED)
4739: {
1.1.1.2 misho 4740: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4741: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1.1.1.3 misho 4742: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4743: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1.1 misho 4744: }
4745: else
4746: {
1.1.1.2 misho 4747: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1.1 misho 4748: jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1.1.1.2 misho 4749: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1.1 misho 4750: OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4751: jump[2] = JUMP(SLJIT_C_GREATER);
1.1.1.3 misho 4752: add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
1.1.1.2 misho 4753: /* Equal. */
4754: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1.1 misho 4755: jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
1.1.1.3 misho 4756: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
1.1 misho 4757:
4758: JUMPHERE(jump[1]);
4759: if (common->nltype == NLTYPE_ANYCRLF)
4760: {
1.1.1.2 misho 4761: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1.1.3 misho 4762: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4763: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
1.1 misho 4764: }
4765: else
4766: {
4767: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4768: read_char(common);
1.1.1.3 misho 4769: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
1.1 misho 4770: add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1.1.1.3 misho 4771: add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
1.1 misho 4772: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4773: }
4774: JUMPHERE(jump[2]);
4775: JUMPHERE(jump[3]);
4776: }
4777: JUMPHERE(jump[0]);
1.1.1.3 misho 4778: check_partial(common, FALSE);
1.1 misho 4779: return cc;
4780:
4781: case OP_EOD:
1.1.1.3 misho 4782: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4783: check_partial(common, FALSE);
1.1 misho 4784: return cc;
4785:
4786: case OP_CIRC:
4787: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4788: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
1.1.1.3 misho 4789: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
1.1 misho 4790: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
1.1.1.3 misho 4791: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
1.1 misho 4792: return cc;
4793:
4794: case OP_CIRCM:
4795: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4796: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4797: jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4798: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
1.1.1.3 misho 4799: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
1.1 misho 4800: jump[0] = JUMP(SLJIT_JUMP);
4801: JUMPHERE(jump[1]);
4802:
1.1.1.3 misho 4803: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1.1 misho 4804: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4805: {
1.1.1.2 misho 4806: OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1.1.1.3 misho 4807: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
1.1.1.2 misho 4808: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4809: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1.1.1.3 misho 4810: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4811: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
1.1 misho 4812: }
4813: else
4814: {
4815: skip_char_back(common);
4816: read_char(common);
1.1.1.3 misho 4817: check_newlinechar(common, common->nltype, backtracks, FALSE);
1.1 misho 4818: }
4819: JUMPHERE(jump[0]);
4820: return cc;
4821:
4822: case OP_DOLL:
4823: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4824: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
1.1.1.3 misho 4825: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
1.1 misho 4826:
4827: if (!common->endonly)
1.1.1.4 misho 4828: compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
1.1 misho 4829: else
1.1.1.3 misho 4830: {
4831: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4832: check_partial(common, FALSE);
4833: }
1.1 misho 4834: return cc;
4835:
4836: case OP_DOLLM:
4837: jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4838: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4839: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
1.1.1.3 misho 4840: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4841: check_partial(common, FALSE);
1.1 misho 4842: jump[0] = JUMP(SLJIT_JUMP);
4843: JUMPHERE(jump[1]);
4844:
4845: if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4846: {
1.1.1.2 misho 4847: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4848: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1.1.1.3 misho 4849: if (common->mode == JIT_COMPILE)
4850: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4851: else
4852: {
4853: jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4854: /* STR_PTR = STR_END - IN_UCHARS(1) */
4855: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4856: check_partial(common, TRUE);
4857: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4858: JUMPHERE(jump[1]);
4859: }
4860:
1.1.1.2 misho 4861: OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1.1.1.3 misho 4862: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4863: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
1.1 misho 4864: }
4865: else
4866: {
4867: peek_char(common);
1.1.1.3 misho 4868: check_newlinechar(common, common->nltype, backtracks, FALSE);
1.1 misho 4869: }
4870: JUMPHERE(jump[0]);
4871: return cc;
4872:
4873: case OP_CHAR:
4874: case OP_CHARI:
4875: length = 1;
1.1.1.2 misho 4876: #ifdef SUPPORT_UTF
4877: if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
1.1 misho 4878: #endif
1.1.1.3 misho 4879: if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
1.1 misho 4880: {
1.1.1.2 misho 4881: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
1.1.1.3 misho 4882: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
1.1 misho 4883:
1.1.1.2 misho 4884: context.length = IN_UCHARS(length);
1.1 misho 4885: context.sourcereg = -1;
4886: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
1.1.1.2 misho 4887: context.ucharptr = 0;
1.1 misho 4888: #endif
1.1.1.3 misho 4889: return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
1.1 misho 4890: }
1.1.1.3 misho 4891: detect_partial_match(common, backtracks);
1.1 misho 4892: read_char(common);
1.1.1.2 misho 4893: #ifdef SUPPORT_UTF
4894: if (common->utf)
1.1 misho 4895: {
4896: GETCHAR(c, cc);
4897: }
4898: else
4899: #endif
4900: c = *cc;
1.1.1.3 misho 4901: if (type == OP_CHAR || !char_has_othercase(common, cc))
4902: {
4903: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4904: return cc + length;
4905: }
4906: oc = char_othercase(common, c);
4907: bit = c ^ oc;
1.1.1.4 misho 4908: if (is_powerof2(bit))
1.1.1.3 misho 4909: {
4910: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4911: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4912: return cc + length;
4913: }
1.1 misho 4914: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
1.1.1.4 misho 4915: OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4916: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4917: OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
1.1.1.3 misho 4918: add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
1.1 misho 4919: return cc + length;
4920:
4921: case OP_NOT:
4922: case OP_NOTI:
1.1.1.3 misho 4923: detect_partial_match(common, backtracks);
1.1 misho 4924: length = 1;
1.1.1.2 misho 4925: #ifdef SUPPORT_UTF
4926: if (common->utf)
1.1 misho 4927: {
1.1.1.2 misho 4928: #ifdef COMPILE_PCRE8
4929: c = *cc;
4930: if (c < 128)
1.1 misho 4931: {
4932: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4933: if (type == OP_NOT || !char_has_othercase(common, cc))
1.1.1.3 misho 4934: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
1.1 misho 4935: else
4936: {
4937: /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4938: OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
1.1.1.3 misho 4939: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
1.1 misho 4940: }
4941: /* Skip the variable-length character. */
1.1.1.2 misho 4942: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1.1 misho 4943: jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1.1.1.4 misho 4944: OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
1.1 misho 4945: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4946: JUMPHERE(jump[0]);
1.1.1.2 misho 4947: return cc + 1;
1.1 misho 4948: }
4949: else
1.1.1.2 misho 4950: #endif /* COMPILE_PCRE8 */
4951: {
4952: GETCHARLEN(c, cc, length);
1.1 misho 4953: read_char(common);
1.1.1.2 misho 4954: }
1.1 misho 4955: }
4956: else
1.1.1.2 misho 4957: #endif /* SUPPORT_UTF */
1.1 misho 4958: {
1.1.1.2 misho 4959: read_char(common);
1.1 misho 4960: c = *cc;
4961: }
4962:
4963: if (type == OP_NOT || !char_has_othercase(common, cc))
1.1.1.3 misho 4964: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
1.1 misho 4965: else
4966: {
4967: oc = char_othercase(common, c);
4968: bit = c ^ oc;
1.1.1.4 misho 4969: if (is_powerof2(bit))
1.1 misho 4970: {
4971: OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
1.1.1.3 misho 4972: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
1.1 misho 4973: }
4974: else
4975: {
1.1.1.3 misho 4976: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4977: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
1.1 misho 4978: }
4979: }
1.1.1.3 misho 4980: return cc + length;
1.1 misho 4981:
4982: case OP_CLASS:
4983: case OP_NCLASS:
1.1.1.3 misho 4984: detect_partial_match(common, backtracks);
1.1 misho 4985: read_char(common);
1.1.1.4 misho 4986: if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4987: return cc + 32 / sizeof(pcre_uchar);
4988:
1.1.1.2 misho 4989: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 4990: jump[0] = NULL;
1.1.1.2 misho 4991: #ifdef COMPILE_PCRE8
4992: /* This check only affects 8 bit mode. In other modes, we
4993: always need to compare the value with 255. */
4994: if (common->utf)
4995: #endif /* COMPILE_PCRE8 */
1.1 misho 4996: {
4997: jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4998: if (type == OP_CLASS)
4999: {
1.1.1.3 misho 5000: add_jump(compiler, backtracks, jump[0]);
1.1 misho 5001: jump[0] = NULL;
5002: }
5003: }
1.1.1.2 misho 5004: #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
1.1 misho 5005: OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5006: OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
1.1.1.4 misho 5007: OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
1.1 misho 5008: OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5009: OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
1.1.1.3 misho 5010: add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
1.1.1.2 misho 5011: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 5012: if (jump[0] != NULL)
5013: JUMPHERE(jump[0]);
1.1.1.2 misho 5014: #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5015: return cc + 32 / sizeof(pcre_uchar);
1.1 misho 5016:
1.1.1.4 misho 5017: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1 misho 5018: case OP_XCLASS:
1.1.1.4 misho 5019: compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
1.1 misho 5020: return cc + GET(cc, 0) - 1;
5021: #endif
5022:
5023: case OP_REVERSE:
5024: length = GET(cc, 0);
1.1.1.3 misho 5025: if (length == 0)
5026: return cc + LINK_SIZE;
1.1 misho 5027: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1.1.1.2 misho 5028: #ifdef SUPPORT_UTF
5029: if (common->utf)
1.1 misho 5030: {
1.1.1.2 misho 5031: OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1.1 misho 5032: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5033: label = LABEL();
1.1.1.3 misho 5034: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
1.1 misho 5035: skip_char_back(common);
5036: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5037: JUMPTO(SLJIT_C_NOT_ZERO, label);
5038: }
1.1.1.3 misho 5039: else
1.1 misho 5040: #endif
1.1.1.3 misho 5041: {
5042: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5043: OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5044: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5045: }
5046: check_start_used_ptr(common);
1.1 misho 5047: return cc + LINK_SIZE;
5048: }
5049: SLJIT_ASSERT_STOP();
5050: return cc;
5051: }
5052:
1.1.1.4 misho 5053: static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
1.1 misho 5054: {
5055: /* This function consumes at least one input character. */
5056: /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5057: DEFINE_COMPILER;
1.1.1.2 misho 5058: pcre_uchar *ccbegin = cc;
1.1 misho 5059: compare_context context;
5060: int size;
5061:
5062: context.length = 0;
5063: do
5064: {
5065: if (cc >= ccend)
5066: break;
5067:
5068: if (*cc == OP_CHAR)
5069: {
5070: size = 1;
1.1.1.2 misho 5071: #ifdef SUPPORT_UTF
5072: if (common->utf && HAS_EXTRALEN(cc[1]))
5073: size += GET_EXTRALEN(cc[1]);
1.1 misho 5074: #endif
5075: }
5076: else if (*cc == OP_CHARI)
5077: {
5078: size = 1;
1.1.1.2 misho 5079: #ifdef SUPPORT_UTF
5080: if (common->utf)
1.1 misho 5081: {
5082: if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5083: size = 0;
1.1.1.2 misho 5084: else if (HAS_EXTRALEN(cc[1]))
5085: size += GET_EXTRALEN(cc[1]);
1.1 misho 5086: }
5087: else
5088: #endif
5089: if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5090: size = 0;
5091: }
5092: else
5093: size = 0;
5094:
5095: cc += 1 + size;
1.1.1.2 misho 5096: context.length += IN_UCHARS(size);
1.1 misho 5097: }
5098: while (size > 0 && context.length <= 128);
5099:
5100: cc = ccbegin;
5101: if (context.length > 0)
5102: {
5103: /* We have a fixed-length byte sequence. */
5104: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
1.1.1.3 misho 5105: add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
1.1 misho 5106:
5107: context.sourcereg = -1;
5108: #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
1.1.1.2 misho 5109: context.ucharptr = 0;
1.1 misho 5110: #endif
1.1.1.3 misho 5111: do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
1.1 misho 5112: return cc;
5113: }
5114:
5115: /* A non-fixed length character will be checked if length == 0. */
1.1.1.4 misho 5116: return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
1.1 misho 5117: }
5118:
5119: /* Forward definitions. */
1.1.1.4 misho 5120: static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5121: static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
1.1 misho 5122:
1.1.1.3 misho 5123: #define PUSH_BACKTRACK(size, ccstart, error) \
1.1 misho 5124: do \
5125: { \
1.1.1.3 misho 5126: backtrack = sljit_alloc_memory(compiler, (size)); \
1.1 misho 5127: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5128: return error; \
1.1.1.3 misho 5129: memset(backtrack, 0, size); \
5130: backtrack->prev = parent->top; \
5131: backtrack->cc = (ccstart); \
5132: parent->top = backtrack; \
1.1 misho 5133: } \
5134: while (0)
5135:
1.1.1.3 misho 5136: #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
1.1 misho 5137: do \
5138: { \
1.1.1.3 misho 5139: backtrack = sljit_alloc_memory(compiler, (size)); \
1.1 misho 5140: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5141: return; \
1.1.1.3 misho 5142: memset(backtrack, 0, size); \
5143: backtrack->prev = parent->top; \
5144: backtrack->cc = (ccstart); \
5145: parent->top = backtrack; \
1.1 misho 5146: } \
5147: while (0)
5148:
1.1.1.3 misho 5149: #define BACKTRACK_AS(type) ((type *)backtrack)
1.1 misho 5150:
1.1.1.5 ! misho 5151: static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
1.1 misho 5152: {
1.1.1.5 ! misho 5153: /* The OVECTOR offset goes to TMP2. */
1.1 misho 5154: DEFINE_COMPILER;
1.1.1.5 ! misho 5155: int count = GET2(cc, 1 + IMM2_SIZE);
! 5156: pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
! 5157: unsigned int offset;
! 5158: jump_list *found = NULL;
! 5159:
! 5160: SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
! 5161:
! 5162: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
! 5163:
! 5164: count--;
! 5165: while (count-- > 0)
! 5166: {
! 5167: offset = GET2(slot, 0) << 1;
! 5168: GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
! 5169: add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
! 5170: slot += common->name_entry_size;
! 5171: }
! 5172:
! 5173: offset = GET2(slot, 0) << 1;
! 5174: GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
! 5175: if (backtracks != NULL && !common->jscript_compat)
! 5176: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
! 5177:
! 5178: set_jumps(found, LABEL());
! 5179: }
! 5180:
! 5181: static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
! 5182: {
! 5183: DEFINE_COMPILER;
! 5184: BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
! 5185: int offset = 0;
1.1 misho 5186: struct sljit_jump *jump = NULL;
1.1.1.3 misho 5187: struct sljit_jump *partial;
5188: struct sljit_jump *nopartial;
1.1 misho 5189:
1.1.1.5 ! misho 5190: if (ref)
! 5191: {
! 5192: offset = GET2(cc, 1) << 1;
! 5193: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
! 5194: /* OVECTOR(1) contains the "string begin - 1" constant. */
! 5195: if (withchecks && !common->jscript_compat)
! 5196: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
! 5197: }
! 5198: else
! 5199: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
1.1 misho 5200:
1.1.1.2 misho 5201: #if defined SUPPORT_UTF && defined SUPPORT_UCP
5202: if (common->utf && *cc == OP_REFI)
1.1 misho 5203: {
1.1.1.4 misho 5204: SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
1.1.1.5 ! misho 5205: if (ref)
! 5206: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
! 5207: else
! 5208: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
! 5209:
1.1 misho 5210: if (withchecks)
5211: jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5212:
5213: /* Needed to save important temporary registers. */
5214: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
1.1.1.4 misho 5215: OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5216: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
1.1.1.2 misho 5217: sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
1.1 misho 5218: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1.1.1.3 misho 5219: if (common->mode == JIT_COMPILE)
5220: add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5221: else
5222: {
5223: add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5224: nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5225: check_partial(common, FALSE);
5226: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5227: JUMPHERE(nopartial);
5228: }
1.1 misho 5229: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5230: }
5231: else
1.1.1.2 misho 5232: #endif /* SUPPORT_UTF && SUPPORT_UCP */
1.1 misho 5233: {
1.1.1.5 ! misho 5234: if (ref)
! 5235: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
! 5236: else
! 5237: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
! 5238:
1.1 misho 5239: if (withchecks)
5240: jump = JUMP(SLJIT_C_ZERO);
1.1.1.3 misho 5241:
1.1 misho 5242: OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1.1.1.3 misho 5243: partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5244: if (common->mode == JIT_COMPILE)
5245: add_jump(compiler, backtracks, partial);
1.1 misho 5246:
5247: add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
1.1.1.3 misho 5248: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5249:
5250: if (common->mode != JIT_COMPILE)
5251: {
5252: nopartial = JUMP(SLJIT_JUMP);
5253: JUMPHERE(partial);
5254: /* TMP2 -= STR_END - STR_PTR */
5255: OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5256: OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5257: partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5258: OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5259: add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5260: add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5261: JUMPHERE(partial);
5262: check_partial(common, FALSE);
5263: add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5264: JUMPHERE(nopartial);
5265: }
1.1 misho 5266: }
5267:
5268: if (jump != NULL)
5269: {
5270: if (emptyfail)
1.1.1.3 misho 5271: add_jump(compiler, backtracks, jump);
1.1 misho 5272: else
5273: JUMPHERE(jump);
5274: }
5275: }
5276:
1.1.1.4 misho 5277: static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
1.1 misho 5278: {
5279: DEFINE_COMPILER;
1.1.1.5 ! misho 5280: BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
1.1.1.3 misho 5281: backtrack_common *backtrack;
1.1.1.2 misho 5282: pcre_uchar type;
1.1.1.5 ! misho 5283: int offset = 0;
1.1 misho 5284: struct sljit_label *label;
5285: struct sljit_jump *zerolength;
5286: struct sljit_jump *jump = NULL;
1.1.1.2 misho 5287: pcre_uchar *ccbegin = cc;
1.1 misho 5288: int min = 0, max = 0;
5289: BOOL minimize;
5290:
1.1.1.3 misho 5291: PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
1.1 misho 5292:
1.1.1.5 ! misho 5293: if (ref)
! 5294: offset = GET2(cc, 1) << 1;
! 5295: else
! 5296: cc += IMM2_SIZE;
1.1.1.2 misho 5297: type = cc[1 + IMM2_SIZE];
1.1.1.5 ! misho 5298:
! 5299: SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
1.1 misho 5300: minimize = (type & 0x1) != 0;
5301: switch(type)
5302: {
5303: case OP_CRSTAR:
5304: case OP_CRMINSTAR:
5305: min = 0;
5306: max = 0;
1.1.1.2 misho 5307: cc += 1 + IMM2_SIZE + 1;
1.1 misho 5308: break;
5309: case OP_CRPLUS:
5310: case OP_CRMINPLUS:
5311: min = 1;
5312: max = 0;
1.1.1.2 misho 5313: cc += 1 + IMM2_SIZE + 1;
1.1 misho 5314: break;
5315: case OP_CRQUERY:
5316: case OP_CRMINQUERY:
5317: min = 0;
5318: max = 1;
1.1.1.2 misho 5319: cc += 1 + IMM2_SIZE + 1;
1.1 misho 5320: break;
5321: case OP_CRRANGE:
5322: case OP_CRMINRANGE:
1.1.1.2 misho 5323: min = GET2(cc, 1 + IMM2_SIZE + 1);
5324: max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5325: cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
1.1 misho 5326: break;
5327: default:
5328: SLJIT_ASSERT_STOP();
5329: break;
5330: }
5331:
5332: if (!minimize)
5333: {
5334: if (min == 0)
5335: {
5336: allocate_stack(common, 2);
1.1.1.5 ! misho 5337: if (ref)
! 5338: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1.1 misho 5339: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5340: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5341: /* Temporary release of STR_PTR. */
1.1.1.4 misho 5342: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
1.1.1.5 ! misho 5343: /* Handles both invalid and empty cases. Since the minimum repeat,
! 5344: is zero the invalid case is basically the same as an empty case. */
! 5345: if (ref)
! 5346: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
! 5347: else
! 5348: {
! 5349: compile_dnref_search(common, ccbegin, NULL);
! 5350: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
! 5351: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
! 5352: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
! 5353: }
1.1 misho 5354: /* Restore if not zero length. */
1.1.1.4 misho 5355: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
1.1 misho 5356: }
5357: else
5358: {
5359: allocate_stack(common, 1);
1.1.1.5 ! misho 5360: if (ref)
! 5361: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1.1 misho 5362: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
1.1.1.5 ! misho 5363: if (ref)
! 5364: {
! 5365: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
! 5366: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
! 5367: }
! 5368: else
! 5369: {
! 5370: compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
! 5371: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
! 5372: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
! 5373: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
! 5374: }
1.1 misho 5375: }
5376:
5377: if (min > 1 || max > 1)
5378: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5379:
5380: label = LABEL();
1.1.1.5 ! misho 5381: if (!ref)
! 5382: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
1.1.1.4 misho 5383: compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
1.1 misho 5384:
5385: if (min > 1 || max > 1)
5386: {
5387: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5388: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5389: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5390: if (min > 1)
5391: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5392: if (max > 1)
5393: {
5394: jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5395: allocate_stack(common, 1);
5396: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5397: JUMPTO(SLJIT_JUMP, label);
5398: JUMPHERE(jump);
5399: }
5400: }
5401:
5402: if (max == 0)
5403: {
5404: /* Includes min > 1 case as well. */
5405: allocate_stack(common, 1);
5406: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5407: JUMPTO(SLJIT_JUMP, label);
5408: }
5409:
5410: JUMPHERE(zerolength);
1.1.1.4 misho 5411: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
1.1 misho 5412:
1.1.1.4 misho 5413: count_match(common);
1.1 misho 5414: return cc;
5415: }
5416:
1.1.1.5 ! misho 5417: allocate_stack(common, ref ? 2 : 3);
! 5418: if (ref)
! 5419: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1.1 misho 5420: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5421: if (type != OP_CRMINSTAR)
5422: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5423:
5424: if (min == 0)
5425: {
1.1.1.5 ! misho 5426: /* Handles both invalid and empty cases. Since the minimum repeat,
! 5427: is zero the invalid case is basically the same as an empty case. */
! 5428: if (ref)
! 5429: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
! 5430: else
! 5431: {
! 5432: compile_dnref_search(common, ccbegin, NULL);
! 5433: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
! 5434: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
! 5435: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
! 5436: }
! 5437: /* Length is non-zero, we can match real repeats. */
1.1 misho 5438: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5439: jump = JUMP(SLJIT_JUMP);
5440: }
5441: else
1.1.1.5 ! misho 5442: {
! 5443: if (ref)
! 5444: {
! 5445: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
! 5446: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
! 5447: }
! 5448: else
! 5449: {
! 5450: compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
! 5451: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
! 5452: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
! 5453: zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
! 5454: }
! 5455: }
1.1 misho 5456:
1.1.1.4 misho 5457: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
1.1 misho 5458: if (max > 0)
1.1.1.3 misho 5459: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
1.1 misho 5460:
1.1.1.5 ! misho 5461: if (!ref)
! 5462: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
1.1.1.4 misho 5463: compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
1.1 misho 5464: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5465:
5466: if (min > 1)
5467: {
5468: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5469: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5470: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
1.1.1.4 misho 5471: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
1.1 misho 5472: }
5473: else if (max > 0)
5474: OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5475:
5476: if (jump != NULL)
5477: JUMPHERE(jump);
5478: JUMPHERE(zerolength);
5479:
1.1.1.4 misho 5480: count_match(common);
1.1 misho 5481: return cc;
5482: }
5483:
1.1.1.4 misho 5484: static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
1.1 misho 5485: {
5486: DEFINE_COMPILER;
1.1.1.3 misho 5487: backtrack_common *backtrack;
1.1 misho 5488: recurse_entry *entry = common->entries;
5489: recurse_entry *prev = NULL;
1.1.1.4 misho 5490: sljit_sw start = GET(cc, 1);
5491: pcre_uchar *start_cc;
5492: BOOL needs_control_head;
1.1 misho 5493:
1.1.1.3 misho 5494: PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
1.1.1.4 misho 5495:
5496: /* Inlining simple patterns. */
5497: if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5498: {
5499: start_cc = common->start + start;
5500: compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5501: BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5502: return cc + 1 + LINK_SIZE;
5503: }
5504:
1.1 misho 5505: while (entry != NULL)
5506: {
5507: if (entry->start == start)
5508: break;
5509: prev = entry;
5510: entry = entry->next;
5511: }
5512:
5513: if (entry == NULL)
5514: {
5515: entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5516: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5517: return NULL;
5518: entry->next = NULL;
5519: entry->entry = NULL;
5520: entry->calls = NULL;
5521: entry->start = start;
5522:
5523: if (prev != NULL)
5524: prev->next = entry;
5525: else
5526: common->entries = entry;
5527: }
5528:
1.1.1.3 misho 5529: if (common->has_set_som && common->mark_ptr != 0)
5530: {
5531: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5532: allocate_stack(common, 2);
5533: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5534: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5535: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5536: }
5537: else if (common->has_set_som || common->mark_ptr != 0)
5538: {
5539: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5540: allocate_stack(common, 1);
5541: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5542: }
1.1 misho 5543:
5544: if (entry->entry == NULL)
5545: add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5546: else
5547: JUMPTO(SLJIT_FAST_CALL, entry->entry);
5548: /* Leave if the match is failed. */
1.1.1.3 misho 5549: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
1.1 misho 5550: return cc + 1 + LINK_SIZE;
5551: }
5552:
1.1.1.4 misho 5553: static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5554: {
5555: const pcre_uchar *begin = arguments->begin;
5556: int *offset_vector = arguments->offsets;
5557: int offset_count = arguments->offset_count;
5558: int i;
5559:
5560: if (PUBL(callout) == NULL)
5561: return 0;
5562:
5563: callout_block->version = 2;
5564: callout_block->callout_data = arguments->callout_data;
5565:
5566: /* Offsets in subject. */
5567: callout_block->subject_length = arguments->end - arguments->begin;
5568: callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5569: callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5570: #if defined COMPILE_PCRE8
5571: callout_block->subject = (PCRE_SPTR)begin;
5572: #elif defined COMPILE_PCRE16
5573: callout_block->subject = (PCRE_SPTR16)begin;
5574: #elif defined COMPILE_PCRE32
5575: callout_block->subject = (PCRE_SPTR32)begin;
5576: #endif
5577:
5578: /* Convert and copy the JIT offset vector to the offset_vector array. */
5579: callout_block->capture_top = 0;
5580: callout_block->offset_vector = offset_vector;
5581: for (i = 2; i < offset_count; i += 2)
5582: {
5583: offset_vector[i] = jit_ovector[i] - begin;
5584: offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5585: if (jit_ovector[i] >= begin)
5586: callout_block->capture_top = i;
5587: }
5588:
5589: callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5590: if (offset_count > 0)
5591: offset_vector[0] = -1;
5592: if (offset_count > 1)
5593: offset_vector[1] = -1;
5594: return (*PUBL(callout))(callout_block);
5595: }
5596:
5597: /* Aligning to 8 byte. */
5598: #define CALLOUT_ARG_SIZE \
5599: (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5600:
5601: #define CALLOUT_ARG_OFFSET(arg) \
5602: (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5603:
5604: static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5605: {
5606: DEFINE_COMPILER;
5607: backtrack_common *backtrack;
5608:
5609: PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5610:
5611: allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5612:
5613: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5614: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5615: SLJIT_ASSERT(common->capture_last_ptr != 0);
5616: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5617: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5618:
5619: /* These pointer sized fields temporarly stores internal variables. */
5620: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5621: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5622: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5623:
5624: if (common->mark_ptr != 0)
5625: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5626: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5627: OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5628: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5629:
5630: /* Needed to save important temporary registers. */
5631: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5632: OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5633: GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5634: sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5635: OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5636: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5637: free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5638:
5639: /* Check return value. */
5640: OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5641: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5642: if (common->forced_quit_label == NULL)
5643: add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5644: else
5645: JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5646: return cc + 2 + 2 * LINK_SIZE;
5647: }
5648:
5649: #undef CALLOUT_ARG_SIZE
5650: #undef CALLOUT_ARG_OFFSET
5651:
5652: static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
1.1 misho 5653: {
5654: DEFINE_COMPILER;
5655: int framesize;
1.1.1.4 misho 5656: int extrasize;
5657: BOOL needs_control_head;
5658: int private_data_ptr;
1.1.1.3 misho 5659: backtrack_common altbacktrack;
1.1.1.2 misho 5660: pcre_uchar *ccbegin;
5661: pcre_uchar opcode;
5662: pcre_uchar bra = OP_BRA;
1.1 misho 5663: jump_list *tmp = NULL;
1.1.1.3 misho 5664: jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
1.1 misho 5665: jump_list **found;
5666: /* Saving previous accept variables. */
1.1.1.4 misho 5667: BOOL save_local_exit = common->local_exit;
5668: BOOL save_positive_assert = common->positive_assert;
5669: then_trap_backtrack *save_then_trap = common->then_trap;
5670: struct sljit_label *save_quit_label = common->quit_label;
5671: struct sljit_label *save_accept_label = common->accept_label;
5672: jump_list *save_quit = common->quit;
5673: jump_list *save_positive_assert_quit = common->positive_assert_quit;
1.1.1.3 misho 5674: jump_list *save_accept = common->accept;
1.1 misho 5675: struct sljit_jump *jump;
5676: struct sljit_jump *brajump = NULL;
5677:
1.1.1.4 misho 5678: /* Assert captures then. */
5679: common->then_trap = NULL;
5680:
1.1 misho 5681: if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5682: {
5683: SLJIT_ASSERT(!conditional);
5684: bra = *cc;
5685: cc++;
5686: }
1.1.1.4 misho 5687: private_data_ptr = PRIVATE_DATA(cc);
5688: SLJIT_ASSERT(private_data_ptr != 0);
5689: framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
1.1.1.3 misho 5690: backtrack->framesize = framesize;
1.1.1.4 misho 5691: backtrack->private_data_ptr = private_data_ptr;
1.1 misho 5692: opcode = *cc;
5693: SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5694: found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5695: ccbegin = cc;
5696: cc += GET(cc, 1);
5697:
5698: if (bra == OP_BRAMINZERO)
5699: {
1.1.1.3 misho 5700: /* This is a braminzero backtrack path. */
1.1 misho 5701: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5702: free_stack(common, 1);
5703: brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5704: }
5705:
5706: if (framesize < 0)
5707: {
1.1.1.4 misho 5708: extrasize = needs_control_head ? 2 : 1;
5709: if (framesize == no_frame)
5710: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5711: allocate_stack(common, extrasize);
5712: if (needs_control_head)
5713: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
1.1 misho 5714: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
1.1.1.4 misho 5715: if (needs_control_head)
5716: {
5717: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5718: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5719: }
1.1 misho 5720: }
5721: else
5722: {
1.1.1.4 misho 5723: extrasize = needs_control_head ? 3 : 2;
5724: allocate_stack(common, framesize + extrasize);
5725: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5726: OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5727: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5728: if (needs_control_head)
5729: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
1.1 misho 5730: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
1.1.1.4 misho 5731: if (needs_control_head)
5732: {
5733: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5734: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5735: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5736: }
5737: else
5738: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5739: init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
1.1 misho 5740: }
5741:
1.1.1.3 misho 5742: memset(&altbacktrack, 0, sizeof(backtrack_common));
1.1.1.4 misho 5743: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5744: {
5745: /* Negative assert is stronger than positive assert. */
5746: common->local_exit = TRUE;
5747: common->quit_label = NULL;
5748: common->quit = NULL;
5749: common->positive_assert = FALSE;
5750: }
5751: else
5752: common->positive_assert = TRUE;
5753: common->positive_assert_quit = NULL;
5754:
1.1 misho 5755: while (1)
5756: {
1.1.1.4 misho 5757: common->accept_label = NULL;
1.1 misho 5758: common->accept = NULL;
1.1.1.3 misho 5759: altbacktrack.top = NULL;
5760: altbacktrack.topbacktracks = NULL;
1.1 misho 5761:
5762: if (*ccbegin == OP_ALT)
5763: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5764:
1.1.1.3 misho 5765: altbacktrack.cc = ccbegin;
1.1.1.4 misho 5766: compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
1.1 misho 5767: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5768: {
1.1.1.4 misho 5769: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5770: {
5771: common->local_exit = save_local_exit;
5772: common->quit_label = save_quit_label;
5773: common->quit = save_quit;
5774: }
5775: common->positive_assert = save_positive_assert;
5776: common->then_trap = save_then_trap;
5777: common->accept_label = save_accept_label;
5778: common->positive_assert_quit = save_positive_assert_quit;
1.1 misho 5779: common->accept = save_accept;
5780: return NULL;
5781: }
1.1.1.4 misho 5782: common->accept_label = LABEL();
1.1 misho 5783: if (common->accept != NULL)
1.1.1.4 misho 5784: set_jumps(common->accept, common->accept_label);
1.1 misho 5785:
5786: /* Reset stack. */
5787: if (framesize < 0)
1.1.1.4 misho 5788: {
5789: if (framesize == no_frame)
5790: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5791: else
5792: free_stack(common, extrasize);
5793: if (needs_control_head)
5794: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5795: }
5796: else
5797: {
1.1 misho 5798: if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5799: {
1.1.1.4 misho 5800: /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5801: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5802: if (needs_control_head)
5803: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
1.1 misho 5804: }
5805: else
5806: {
1.1.1.4 misho 5807: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5808: if (needs_control_head)
5809: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
1.1 misho 5810: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5811: }
1.1.1.4 misho 5812: }
1.1 misho 5813:
5814: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5815: {
5816: /* We know that STR_PTR was stored on the top of the stack. */
5817: if (conditional)
1.1.1.4 misho 5818: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
1.1 misho 5819: else if (bra == OP_BRAZERO)
5820: {
5821: if (framesize < 0)
1.1.1.4 misho 5822: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
1.1 misho 5823: else
5824: {
1.1.1.4 misho 5825: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5826: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5827: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
1.1 misho 5828: }
1.1.1.4 misho 5829: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
1.1 misho 5830: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5831: }
5832: else if (framesize >= 0)
5833: {
5834: /* For OP_BRA and OP_BRAMINZERO. */
1.1.1.4 misho 5835: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
1.1 misho 5836: }
5837: }
5838: add_jump(compiler, found, JUMP(SLJIT_JUMP));
5839:
1.1.1.4 misho 5840: compile_backtrackingpath(common, altbacktrack.top);
1.1 misho 5841: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5842: {
1.1.1.4 misho 5843: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5844: {
5845: common->local_exit = save_local_exit;
5846: common->quit_label = save_quit_label;
5847: common->quit = save_quit;
5848: }
5849: common->positive_assert = save_positive_assert;
5850: common->then_trap = save_then_trap;
5851: common->accept_label = save_accept_label;
5852: common->positive_assert_quit = save_positive_assert_quit;
1.1 misho 5853: common->accept = save_accept;
5854: return NULL;
5855: }
1.1.1.3 misho 5856: set_jumps(altbacktrack.topbacktracks, LABEL());
1.1 misho 5857:
5858: if (*cc != OP_ALT)
5859: break;
5860:
5861: ccbegin = cc;
5862: cc += GET(cc, 1);
5863: }
1.1.1.4 misho 5864:
5865: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5866: {
5867: SLJIT_ASSERT(common->positive_assert_quit == NULL);
5868: /* Makes the check less complicated below. */
5869: common->positive_assert_quit = common->quit;
5870: }
5871:
1.1 misho 5872: /* None of them matched. */
1.1.1.4 misho 5873: if (common->positive_assert_quit != NULL)
5874: {
5875: jump = JUMP(SLJIT_JUMP);
5876: set_jumps(common->positive_assert_quit, LABEL());
5877: SLJIT_ASSERT(framesize != no_stack);
5878: if (framesize < 0)
5879: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5880: else
5881: {
5882: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5883: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5884: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5885: }
5886: JUMPHERE(jump);
5887: }
5888:
5889: if (needs_control_head)
5890: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
1.1 misho 5891:
5892: if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5893: {
5894: /* Assert is failed. */
5895: if (conditional || bra == OP_BRAZERO)
5896: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5897:
5898: if (framesize < 0)
5899: {
5900: /* The topmost item should be 0. */
5901: if (bra == OP_BRAZERO)
1.1.1.4 misho 5902: {
5903: if (extrasize == 2)
5904: free_stack(common, 1);
1.1 misho 5905: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
1.1.1.4 misho 5906: }
1.1 misho 5907: else
1.1.1.4 misho 5908: free_stack(common, extrasize);
1.1 misho 5909: }
5910: else
5911: {
1.1.1.4 misho 5912: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
1.1 misho 5913: /* The topmost item should be 0. */
5914: if (bra == OP_BRAZERO)
5915: {
1.1.1.4 misho 5916: free_stack(common, framesize + extrasize - 1);
1.1 misho 5917: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5918: }
5919: else
1.1.1.4 misho 5920: free_stack(common, framesize + extrasize);
5921: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
1.1 misho 5922: }
5923: jump = JUMP(SLJIT_JUMP);
5924: if (bra != OP_BRAZERO)
5925: add_jump(compiler, target, jump);
5926:
5927: /* Assert is successful. */
5928: set_jumps(tmp, LABEL());
5929: if (framesize < 0)
5930: {
5931: /* We know that STR_PTR was stored on the top of the stack. */
1.1.1.4 misho 5932: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
1.1 misho 5933: /* Keep the STR_PTR on the top of the stack. */
5934: if (bra == OP_BRAZERO)
1.1.1.4 misho 5935: {
5936: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5937: if (extrasize == 2)
5938: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5939: }
1.1 misho 5940: else if (bra == OP_BRAMINZERO)
5941: {
1.1.1.4 misho 5942: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
1.1 misho 5943: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5944: }
5945: }
5946: else
5947: {
5948: if (bra == OP_BRA)
5949: {
1.1.1.4 misho 5950: /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5951: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5952: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
1.1 misho 5953: }
5954: else
5955: {
1.1.1.4 misho 5956: /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5957: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5958: if (extrasize == 2)
5959: {
5960: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5961: if (bra == OP_BRAMINZERO)
5962: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5963: }
5964: else
5965: {
5966: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5967: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5968: }
1.1 misho 5969: }
5970: }
5971:
5972: if (bra == OP_BRAZERO)
5973: {
1.1.1.4 misho 5974: backtrack->matchingpath = LABEL();
5975: SET_LABEL(jump, backtrack->matchingpath);
1.1 misho 5976: }
5977: else if (bra == OP_BRAMINZERO)
5978: {
1.1.1.4 misho 5979: JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
1.1 misho 5980: JUMPHERE(brajump);
5981: if (framesize >= 0)
5982: {
1.1.1.4 misho 5983: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 5984: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 5985: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
1.1 misho 5986: }
1.1.1.3 misho 5987: set_jumps(backtrack->common.topbacktracks, LABEL());
1.1 misho 5988: }
5989: }
5990: else
5991: {
5992: /* AssertNot is successful. */
5993: if (framesize < 0)
5994: {
5995: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5996: if (bra != OP_BRA)
1.1.1.4 misho 5997: {
5998: if (extrasize == 2)
5999: free_stack(common, 1);
1.1 misho 6000: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
1.1.1.4 misho 6001: }
1.1 misho 6002: else
1.1.1.4 misho 6003: free_stack(common, extrasize);
1.1 misho 6004: }
6005: else
6006: {
6007: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
1.1.1.4 misho 6008: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
1.1 misho 6009: /* The topmost item should be 0. */
6010: if (bra != OP_BRA)
6011: {
1.1.1.4 misho 6012: free_stack(common, framesize + extrasize - 1);
1.1 misho 6013: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6014: }
6015: else
1.1.1.4 misho 6016: free_stack(common, framesize + extrasize);
6017: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
1.1 misho 6018: }
6019:
6020: if (bra == OP_BRAZERO)
1.1.1.4 misho 6021: backtrack->matchingpath = LABEL();
1.1 misho 6022: else if (bra == OP_BRAMINZERO)
6023: {
1.1.1.4 misho 6024: JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
1.1 misho 6025: JUMPHERE(brajump);
6026: }
6027:
6028: if (bra != OP_BRA)
6029: {
1.1.1.3 misho 6030: SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6031: set_jumps(backtrack->common.topbacktracks, LABEL());
6032: backtrack->common.topbacktracks = NULL;
1.1 misho 6033: }
6034: }
6035:
1.1.1.4 misho 6036: if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6037: {
6038: common->local_exit = save_local_exit;
6039: common->quit_label = save_quit_label;
6040: common->quit = save_quit;
6041: }
6042: common->positive_assert = save_positive_assert;
6043: common->then_trap = save_then_trap;
6044: common->accept_label = save_accept_label;
6045: common->positive_assert_quit = save_positive_assert_quit;
1.1 misho 6046: common->accept = save_accept;
6047: return cc + 1 + LINK_SIZE;
6048: }
6049:
1.1.1.4 misho 6050: static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6051: {
6052: DEFINE_COMPILER;
6053: int stacksize;
6054:
6055: if (framesize < 0)
6056: {
6057: if (framesize == no_frame)
6058: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6059: else
6060: {
6061: stacksize = needs_control_head ? 1 : 0;
6062: if (ket != OP_KET || has_alternatives)
6063: stacksize++;
6064: free_stack(common, stacksize);
6065: }
6066:
6067: if (needs_control_head)
6068: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6069:
6070: /* TMP2 which is set here used by OP_KETRMAX below. */
6071: if (ket == OP_KETRMAX)
6072: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6073: else if (ket == OP_KETRMIN)
6074: {
6075: /* Move the STR_PTR to the private_data_ptr. */
6076: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6077: }
6078: }
6079: else
6080: {
6081: stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6082: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6083: if (needs_control_head)
6084: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6085:
6086: if (ket == OP_KETRMAX)
6087: {
6088: /* TMP2 which is set here used by OP_KETRMAX below. */
6089: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6090: }
6091: }
6092: if (needs_control_head)
6093: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6094: }
6095:
6096: static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6097: {
6098: DEFINE_COMPILER;
6099:
6100: if (common->capture_last_ptr != 0)
6101: {
6102: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6103: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6104: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6105: stacksize++;
6106: }
6107: if (common->optimized_cbracket[offset >> 1] == 0)
6108: {
6109: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6110: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6111: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6112: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6113: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6114: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6115: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6116: stacksize += 2;
6117: }
6118: return stacksize;
6119: }
6120:
1.1 misho 6121: /*
6122: Handling bracketed expressions is probably the most complex part.
6123:
6124: Stack layout naming characters:
6125: S - Push the current STR_PTR
6126: 0 - Push a 0 (NULL)
6127: A - Push the current STR_PTR. Needed for restoring the STR_PTR
6128: before the next alternative. Not pushed if there are no alternatives.
6129: M - Any values pushed by the current alternative. Can be empty, or anything.
6130: C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6131: L - Push the previous local (pointed by localptr) to the stack
6132: () - opional values stored on the stack
6133: ()* - optonal, can be stored multiple times
6134:
6135: The following list shows the regular expression templates, their PCRE byte codes
6136: and stack layout supported by pcre-sljit.
6137:
6138: (?:) OP_BRA | OP_KET A M
6139: () OP_CBRA | OP_KET C M
6140: (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6141: OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6142: (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6143: OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6144: ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6145: OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6146: ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6147: OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6148: (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6149: (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6150: ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6151: ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6152: (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6153: OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6154: (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6155: OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6156: ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6157: OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6158: ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6159: OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6160:
6161:
6162: Stack layout naming characters:
6163: A - Push the alternative index (starting from 0) on the stack.
6164: Not pushed if there is no alternatives.
6165: M - Any values pushed by the current alternative. Can be empty, or anything.
6166:
6167: The next list shows the possible content of a bracket:
6168: (|) OP_*BRA | OP_ALT ... M A
6169: (?()|) OP_*COND | OP_ALT M A
6170: (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6171: (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6172: Or nothing, if trace is unnecessary
6173: */
6174:
1.1.1.4 misho 6175: static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
1.1 misho 6176: {
6177: DEFINE_COMPILER;
1.1.1.3 misho 6178: backtrack_common *backtrack;
1.1.1.2 misho 6179: pcre_uchar opcode;
1.1.1.4 misho 6180: int private_data_ptr = 0;
1.1 misho 6181: int offset = 0;
1.1.1.5 ! misho 6182: int i, stacksize;
1.1.1.4 misho 6183: int repeat_ptr = 0, repeat_length = 0;
6184: int repeat_type = 0, repeat_count = 0;
1.1.1.2 misho 6185: pcre_uchar *ccbegin;
1.1.1.4 misho 6186: pcre_uchar *matchingpath;
1.1.1.5 ! misho 6187: pcre_uchar *slot;
1.1.1.2 misho 6188: pcre_uchar bra = OP_BRA;
6189: pcre_uchar ket;
1.1.1.3 misho 6190: assert_backtrack *assert;
1.1 misho 6191: BOOL has_alternatives;
1.1.1.4 misho 6192: BOOL needs_control_head = FALSE;
1.1 misho 6193: struct sljit_jump *jump;
6194: struct sljit_jump *skip;
1.1.1.4 misho 6195: struct sljit_label *rmax_label = NULL;
6196: struct sljit_jump *braminzero = NULL;
1.1 misho 6197:
1.1.1.3 misho 6198: PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
1.1 misho 6199:
6200: if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6201: {
6202: bra = *cc;
6203: cc++;
6204: opcode = *cc;
6205: }
6206:
6207: opcode = *cc;
6208: ccbegin = cc;
1.1.1.4 misho 6209: matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6210: ket = *matchingpath;
6211: if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6212: {
6213: repeat_ptr = PRIVATE_DATA(matchingpath);
6214: repeat_length = PRIVATE_DATA(matchingpath + 1);
6215: repeat_type = PRIVATE_DATA(matchingpath + 2);
6216: repeat_count = PRIVATE_DATA(matchingpath + 3);
6217: SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6218: if (repeat_type == OP_UPTO)
6219: ket = OP_KETRMAX;
6220: if (repeat_type == OP_MINUPTO)
6221: ket = OP_KETRMIN;
6222: }
1.1 misho 6223:
6224: if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6225: {
1.1.1.3 misho 6226: /* Drop this bracket_backtrack. */
6227: parent->top = backtrack->prev;
1.1.1.4 misho 6228: return matchingpath + 1 + LINK_SIZE + repeat_length;
1.1 misho 6229: }
6230:
1.1.1.4 misho 6231: matchingpath = ccbegin + 1 + LINK_SIZE;
1.1 misho 6232: SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6233: SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6234: cc += GET(cc, 1);
6235:
6236: has_alternatives = *cc == OP_ALT;
1.1.1.5 ! misho 6237: if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
! 6238: has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
1.1 misho 6239:
6240: if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6241: opcode = OP_SCOND;
6242: if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6243: opcode = OP_ONCE;
6244:
6245: if (opcode == OP_CBRA || opcode == OP_SCBRA)
6246: {
6247: /* Capturing brackets has a pre-allocated space. */
6248: offset = GET2(ccbegin, 1 + LINK_SIZE);
1.1.1.4 misho 6249: if (common->optimized_cbracket[offset] == 0)
6250: {
6251: private_data_ptr = OVECTOR_PRIV(offset);
6252: offset <<= 1;
6253: }
6254: else
6255: {
6256: offset <<= 1;
6257: private_data_ptr = OVECTOR(offset);
6258: }
6259: BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6260: matchingpath += IMM2_SIZE;
1.1 misho 6261: }
6262: else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6263: {
6264: /* Other brackets simply allocate the next entry. */
1.1.1.4 misho 6265: private_data_ptr = PRIVATE_DATA(ccbegin);
6266: SLJIT_ASSERT(private_data_ptr != 0);
6267: BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
1.1 misho 6268: if (opcode == OP_ONCE)
1.1.1.4 misho 6269: BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
1.1 misho 6270: }
6271:
6272: /* Instructions before the first alternative. */
6273: stacksize = 0;
1.1.1.4 misho 6274: if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
1.1 misho 6275: stacksize++;
6276: if (bra == OP_BRAZERO)
6277: stacksize++;
6278:
6279: if (stacksize > 0)
6280: allocate_stack(common, stacksize);
6281:
6282: stacksize = 0;
1.1.1.4 misho 6283: if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
1.1 misho 6284: {
6285: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6286: stacksize++;
6287: }
6288:
6289: if (bra == OP_BRAZERO)
6290: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6291:
6292: if (bra == OP_BRAMINZERO)
6293: {
1.1.1.3 misho 6294: /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
1.1 misho 6295: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6296: if (ket != OP_KETRMIN)
6297: {
6298: free_stack(common, 1);
1.1.1.4 misho 6299: braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
1.1 misho 6300: }
6301: else
6302: {
6303: if (opcode == OP_ONCE || opcode >= OP_SBRA)
6304: {
6305: jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6306: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6307: /* Nothing stored during the first run. */
6308: skip = JUMP(SLJIT_JUMP);
6309: JUMPHERE(jump);
6310: /* Checking zero-length iteration. */
1.1.1.3 misho 6311: if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
1.1 misho 6312: {
1.1.1.4 misho 6313: /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6314: braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 6315: }
6316: else
6317: {
6318: /* Except when the whole stack frame must be saved. */
1.1.1.4 misho 6319: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6320: braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
1.1 misho 6321: }
6322: JUMPHERE(skip);
6323: }
6324: else
6325: {
6326: jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6327: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6328: JUMPHERE(jump);
6329: }
6330: }
6331: }
6332:
1.1.1.4 misho 6333: if (repeat_type != 0)
6334: {
6335: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6336: if (repeat_type == OP_EXACT)
6337: rmax_label = LABEL();
6338: }
6339:
1.1 misho 6340: if (ket == OP_KETRMIN)
1.1.1.4 misho 6341: BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
1.1 misho 6342:
6343: if (ket == OP_KETRMAX)
6344: {
1.1.1.4 misho 6345: rmax_label = LABEL();
6346: if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6347: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
1.1 misho 6348: }
6349:
6350: /* Handling capturing brackets and alternatives. */
6351: if (opcode == OP_ONCE)
6352: {
1.1.1.4 misho 6353: stacksize = 0;
6354: if (needs_control_head)
6355: {
6356: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6357: stacksize++;
6358: }
6359:
1.1.1.3 misho 6360: if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
1.1 misho 6361: {
1.1.1.4 misho 6362: /* Neither capturing brackets nor recursions are found in the block. */
1.1 misho 6363: if (ket == OP_KETRMIN)
6364: {
1.1.1.4 misho 6365: stacksize += 2;
6366: if (!needs_control_head)
6367: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 6368: }
1.1.1.4 misho 6369: else
1.1 misho 6370: {
1.1.1.4 misho 6371: if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6372: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6373: if (ket == OP_KETRMAX || has_alternatives)
6374: stacksize++;
1.1 misho 6375: }
1.1.1.4 misho 6376:
6377: if (stacksize > 0)
6378: allocate_stack(common, stacksize);
6379:
6380: stacksize = 0;
6381: if (needs_control_head)
6382: {
6383: stacksize++;
6384: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6385: }
6386:
6387: if (ket == OP_KETRMIN)
6388: {
6389: if (needs_control_head)
6390: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6391: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6392: if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6393: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6394: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6395: }
6396: else if (ket == OP_KETRMAX || has_alternatives)
6397: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
1.1 misho 6398: }
6399: else
6400: {
1.1.1.4 misho 6401: if (ket != OP_KET || has_alternatives)
6402: stacksize++;
6403:
6404: stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6405: allocate_stack(common, stacksize);
6406:
6407: if (needs_control_head)
6408: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6409:
6410: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6411: OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6412:
6413: stacksize = needs_control_head ? 1 : 0;
6414: if (ket != OP_KET || has_alternatives)
1.1 misho 6415: {
1.1.1.4 misho 6416: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6417: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6418: stacksize++;
6419: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
1.1 misho 6420: }
6421: else
6422: {
1.1.1.4 misho 6423: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6424: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
1.1 misho 6425: }
1.1.1.4 misho 6426: init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
1.1 misho 6427: }
6428: }
6429: else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6430: {
6431: /* Saving the previous values. */
1.1.1.4 misho 6432: if (common->optimized_cbracket[offset >> 1] != 0)
6433: {
6434: SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6435: allocate_stack(common, 2);
6436: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6437: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6438: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6439: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6440: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6441: }
6442: else
6443: {
6444: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6445: allocate_stack(common, 1);
6446: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6447: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6448: }
1.1 misho 6449: }
6450: else if (opcode == OP_SBRA || opcode == OP_SCOND)
6451: {
6452: /* Saving the previous value. */
1.1.1.4 misho 6453: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 6454: allocate_stack(common, 1);
1.1.1.4 misho 6455: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
1.1 misho 6456: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6457: }
6458: else if (has_alternatives)
6459: {
6460: /* Pushing the starting string pointer. */
6461: allocate_stack(common, 1);
6462: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6463: }
6464:
6465: /* Generating code for the first alternative. */
6466: if (opcode == OP_COND || opcode == OP_SCOND)
6467: {
1.1.1.4 misho 6468: if (*matchingpath == OP_CREF)
1.1 misho 6469: {
6470: SLJIT_ASSERT(has_alternatives);
1.1.1.3 misho 6471: add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
1.1.1.4 misho 6472: CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6473: matchingpath += 1 + IMM2_SIZE;
1.1 misho 6474: }
1.1.1.5 ! misho 6475: else if (*matchingpath == OP_DNCREF)
1.1 misho 6476: {
6477: SLJIT_ASSERT(has_alternatives);
6478:
1.1.1.5 ! misho 6479: i = GET2(matchingpath, 1 + IMM2_SIZE);
! 6480: slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
! 6481: OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
! 6482: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
! 6483: OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
! 6484: slot += common->name_entry_size;
! 6485: i--;
! 6486: while (i-- > 0)
! 6487: {
! 6488: OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
! 6489: OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
! 6490: slot += common->name_entry_size;
! 6491: }
! 6492: OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
! 6493: add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
! 6494: matchingpath += 1 + 2 * IMM2_SIZE;
1.1 misho 6495: }
1.1.1.5 ! misho 6496: else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
1.1 misho 6497: {
6498: /* Never has other case. */
1.1.1.3 misho 6499: BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
1.1.1.5 ! misho 6500: SLJIT_ASSERT(!has_alternatives);
1.1 misho 6501:
1.1.1.5 ! misho 6502: if (*matchingpath == OP_RREF)
1.1 misho 6503: {
1.1.1.5 ! misho 6504: stacksize = GET2(matchingpath, 1);
! 6505: if (common->currententry == NULL)
! 6506: stacksize = 0;
! 6507: else if (stacksize == RREF_ANY)
! 6508: stacksize = 1;
! 6509: else if (common->currententry->start == 0)
! 6510: stacksize = stacksize == 0;
! 6511: else
! 6512: stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
! 6513:
1.1 misho 6514: if (stacksize != 0)
1.1.1.4 misho 6515: matchingpath += 1 + IMM2_SIZE;
1.1.1.5 ! misho 6516: }
! 6517: else
! 6518: {
! 6519: if (common->currententry == NULL || common->currententry->start == 0)
! 6520: stacksize = 0;
1.1 misho 6521: else
6522: {
1.1.1.5 ! misho 6523: stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
! 6524: slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
! 6525: i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
! 6526: while (stacksize > 0)
! 6527: {
! 6528: if ((int)GET2(slot, 0) == i)
! 6529: break;
! 6530: slot += common->name_entry_size;
! 6531: stacksize--;
! 6532: }
! 6533: }
! 6534:
! 6535: if (stacksize != 0)
! 6536: matchingpath += 1 + 2 * IMM2_SIZE;
! 6537: }
! 6538:
! 6539: /* The stacksize == 0 is a common "else" case. */
! 6540: if (stacksize == 0)
! 6541: {
1.1 misho 6542: if (*cc == OP_ALT)
6543: {
1.1.1.4 misho 6544: matchingpath = cc + 1 + LINK_SIZE;
1.1 misho 6545: cc += GET(cc, 1);
6546: }
6547: else
1.1.1.4 misho 6548: matchingpath = cc;
1.1 misho 6549: }
6550: }
6551: else
6552: {
1.1.1.4 misho 6553: SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
1.1.1.3 misho 6554: /* Similar code as PUSH_BACKTRACK macro. */
6555: assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
1.1 misho 6556: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6557: return NULL;
1.1.1.3 misho 6558: memset(assert, 0, sizeof(assert_backtrack));
1.1.1.4 misho 6559: assert->common.cc = matchingpath;
1.1.1.3 misho 6560: BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
1.1.1.4 misho 6561: matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
1.1 misho 6562: }
6563: }
6564:
1.1.1.4 misho 6565: compile_matchingpath(common, matchingpath, cc, backtrack);
1.1 misho 6566: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6567: return NULL;
6568:
6569: if (opcode == OP_ONCE)
1.1.1.4 misho 6570: match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
1.1 misho 6571:
6572: stacksize = 0;
1.1.1.4 misho 6573: if (repeat_type == OP_MINUPTO)
6574: {
6575: /* We need to preserve the counter. TMP2 will be used below. */
6576: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6577: stacksize++;
6578: }
1.1 misho 6579: if (ket != OP_KET || bra != OP_BRA)
6580: stacksize++;
1.1.1.4 misho 6581: if (offset != 0)
6582: {
6583: if (common->capture_last_ptr != 0)
6584: stacksize++;
6585: if (common->optimized_cbracket[offset >> 1] == 0)
6586: stacksize += 2;
6587: }
1.1 misho 6588: if (has_alternatives && opcode != OP_ONCE)
6589: stacksize++;
6590:
6591: if (stacksize > 0)
6592: allocate_stack(common, stacksize);
6593:
6594: stacksize = 0;
1.1.1.4 misho 6595: if (repeat_type == OP_MINUPTO)
1.1 misho 6596: {
1.1.1.4 misho 6597: /* TMP2 was set above. */
6598: OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
1.1 misho 6599: stacksize++;
6600: }
1.1.1.4 misho 6601:
6602: if (ket != OP_KET || bra != OP_BRA)
1.1 misho 6603: {
1.1.1.4 misho 6604: if (ket != OP_KET)
6605: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6606: else
6607: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
1.1 misho 6608: stacksize++;
6609: }
6610:
1.1.1.4 misho 6611: if (offset != 0)
6612: stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6613:
1.1 misho 6614: if (has_alternatives)
6615: {
6616: if (opcode != OP_ONCE)
6617: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6618: if (ket != OP_KETRMAX)
1.1.1.4 misho 6619: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
1.1 misho 6620: }
6621:
1.1.1.4 misho 6622: /* Must be after the matchingpath label. */
6623: if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
1.1 misho 6624: {
1.1.1.4 misho 6625: SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
1.1 misho 6626: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6627: }
6628:
6629: if (ket == OP_KETRMAX)
6630: {
1.1.1.4 misho 6631: if (repeat_type != 0)
6632: {
6633: if (has_alternatives)
6634: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6635: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6636: JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6637: /* Drop STR_PTR for greedy plus quantifier. */
6638: if (opcode != OP_ONCE)
6639: free_stack(common, 1);
6640: }
6641: else if (opcode == OP_ONCE || opcode >= OP_SBRA)
1.1 misho 6642: {
6643: if (has_alternatives)
1.1.1.4 misho 6644: BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
1.1 misho 6645: /* Checking zero-length iteration. */
6646: if (opcode != OP_ONCE)
1.1.1.3 misho 6647: {
1.1.1.4 misho 6648: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
1.1.1.3 misho 6649: /* Drop STR_PTR for greedy plus quantifier. */
6650: if (bra != OP_BRAZERO)
6651: free_stack(common, 1);
6652: }
1.1 misho 6653: else
6654: /* TMP2 must contain the starting STR_PTR. */
1.1.1.4 misho 6655: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
1.1 misho 6656: }
6657: else
1.1.1.4 misho 6658: JUMPTO(SLJIT_JUMP, rmax_label);
6659: BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6660: }
6661:
6662: if (repeat_type == OP_EXACT)
6663: {
6664: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6665: JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6666: }
6667: else if (repeat_type == OP_UPTO)
6668: {
6669: /* We need to preserve the counter. */
6670: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6671: allocate_stack(common, 1);
6672: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
1.1 misho 6673: }
6674:
6675: if (bra == OP_BRAZERO)
1.1.1.4 misho 6676: BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
1.1 misho 6677:
6678: if (bra == OP_BRAMINZERO)
6679: {
1.1.1.3 misho 6680: /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
1.1.1.4 misho 6681: JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6682: if (braminzero != NULL)
1.1 misho 6683: {
1.1.1.4 misho 6684: JUMPHERE(braminzero);
1.1 misho 6685: /* We need to release the end pointer to perform the
1.1.1.3 misho 6686: backtrack for the zero-length iteration. When
1.1 misho 6687: framesize is < 0, OP_ONCE will do the release itself. */
1.1.1.3 misho 6688: if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
1.1 misho 6689: {
1.1.1.4 misho 6690: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 6691: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6692: }
6693: else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6694: free_stack(common, 1);
6695: }
1.1.1.3 misho 6696: /* Continue to the normal backtrack. */
1.1 misho 6697: }
6698:
6699: if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
1.1.1.4 misho 6700: count_match(common);
1.1 misho 6701:
6702: /* Skip the other alternatives. */
6703: while (*cc == OP_ALT)
6704: cc += GET(cc, 1);
6705: cc += 1 + LINK_SIZE;
1.1.1.4 misho 6706:
6707: /* Temporarily encoding the needs_control_head in framesize. */
6708: if (opcode == OP_ONCE)
6709: BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6710: return cc + repeat_length;
1.1 misho 6711: }
6712:
1.1.1.4 misho 6713: static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
1.1 misho 6714: {
6715: DEFINE_COMPILER;
1.1.1.3 misho 6716: backtrack_common *backtrack;
1.1.1.2 misho 6717: pcre_uchar opcode;
1.1.1.4 misho 6718: int private_data_ptr;
1.1 misho 6719: int cbraprivptr = 0;
1.1.1.4 misho 6720: BOOL needs_control_head;
1.1 misho 6721: int framesize;
6722: int stacksize;
6723: int offset = 0;
6724: BOOL zero = FALSE;
1.1.1.2 misho 6725: pcre_uchar *ccbegin = NULL;
1.1.1.4 misho 6726: int stack; /* Also contains the offset of control head. */
1.1 misho 6727: struct sljit_label *loop = NULL;
6728: struct jump_list *emptymatch = NULL;
6729:
1.1.1.3 misho 6730: PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
1.1 misho 6731: if (*cc == OP_BRAPOSZERO)
6732: {
6733: zero = TRUE;
6734: cc++;
6735: }
6736:
6737: opcode = *cc;
1.1.1.4 misho 6738: private_data_ptr = PRIVATE_DATA(cc);
6739: SLJIT_ASSERT(private_data_ptr != 0);
6740: BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
1.1 misho 6741: switch(opcode)
6742: {
6743: case OP_BRAPOS:
6744: case OP_SBRAPOS:
6745: ccbegin = cc + 1 + LINK_SIZE;
6746: break;
6747:
6748: case OP_CBRAPOS:
6749: case OP_SCBRAPOS:
6750: offset = GET2(cc, 1 + LINK_SIZE);
1.1.1.4 misho 6751: /* This case cannot be optimized in the same was as
6752: normal capturing brackets. */
6753: SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
1.1 misho 6754: cbraprivptr = OVECTOR_PRIV(offset);
6755: offset <<= 1;
1.1.1.2 misho 6756: ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
1.1 misho 6757: break;
6758:
6759: default:
6760: SLJIT_ASSERT_STOP();
6761: break;
6762: }
6763:
1.1.1.4 misho 6764: framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
1.1.1.3 misho 6765: BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
1.1 misho 6766: if (framesize < 0)
6767: {
1.1.1.4 misho 6768: if (offset != 0)
6769: {
6770: stacksize = 2;
6771: if (common->capture_last_ptr != 0)
6772: stacksize++;
6773: }
6774: else
6775: stacksize = 1;
6776:
6777: if (needs_control_head)
6778: stacksize++;
1.1 misho 6779: if (!zero)
6780: stacksize++;
1.1.1.4 misho 6781:
1.1.1.3 misho 6782: BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
1.1 misho 6783: allocate_stack(common, stacksize);
1.1.1.4 misho 6784: if (framesize == no_frame)
6785: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
1.1 misho 6786:
1.1.1.4 misho 6787: stack = 0;
6788: if (offset != 0)
1.1 misho 6789: {
1.1.1.4 misho 6790: stack = 2;
1.1 misho 6791: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6792: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6793: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
1.1.1.4 misho 6794: if (common->capture_last_ptr != 0)
6795: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1.1 misho 6796: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
1.1.1.4 misho 6797: if (needs_control_head)
6798: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6799: if (common->capture_last_ptr != 0)
6800: {
6801: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6802: stack = 3;
6803: }
1.1 misho 6804: }
6805: else
1.1.1.4 misho 6806: {
6807: if (needs_control_head)
6808: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
1.1 misho 6809: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
1.1.1.4 misho 6810: stack = 1;
6811: }
1.1 misho 6812:
1.1.1.4 misho 6813: if (needs_control_head)
6814: stack++;
1.1 misho 6815: if (!zero)
1.1.1.4 misho 6816: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6817: if (needs_control_head)
6818: {
6819: stack--;
6820: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6821: }
1.1 misho 6822: }
6823: else
6824: {
6825: stacksize = framesize + 1;
6826: if (!zero)
6827: stacksize++;
1.1.1.4 misho 6828: if (needs_control_head)
6829: stacksize++;
6830: if (offset == 0)
1.1 misho 6831: stacksize++;
1.1.1.3 misho 6832: BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
1.1.1.4 misho 6833:
1.1 misho 6834: allocate_stack(common, stacksize);
1.1.1.4 misho 6835: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6836: if (needs_control_head)
6837: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6838: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
1.1 misho 6839:
6840: stack = 0;
6841: if (!zero)
6842: {
6843: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
1.1.1.4 misho 6844: stack = 1;
6845: }
6846: if (needs_control_head)
6847: {
6848: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
1.1 misho 6849: stack++;
6850: }
1.1.1.4 misho 6851: if (offset == 0)
1.1 misho 6852: {
6853: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6854: stack++;
6855: }
6856: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
1.1.1.4 misho 6857: init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
6858: stack -= 1 + (offset == 0);
1.1 misho 6859: }
6860:
1.1.1.4 misho 6861: if (offset != 0)
1.1 misho 6862: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6863:
6864: loop = LABEL();
6865: while (*cc != OP_KETRPOS)
6866: {
1.1.1.3 misho 6867: backtrack->top = NULL;
6868: backtrack->topbacktracks = NULL;
1.1 misho 6869: cc += GET(cc, 1);
6870:
1.1.1.4 misho 6871: compile_matchingpath(common, ccbegin, cc, backtrack);
1.1 misho 6872: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6873: return NULL;
6874:
6875: if (framesize < 0)
6876: {
1.1.1.4 misho 6877: if (framesize == no_frame)
6878: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 6879:
1.1.1.4 misho 6880: if (offset != 0)
1.1 misho 6881: {
6882: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6883: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6884: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
1.1.1.4 misho 6885: if (common->capture_last_ptr != 0)
6886: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
1.1 misho 6887: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6888: }
6889: else
6890: {
6891: if (opcode == OP_SBRAPOS)
6892: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6893: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6894: }
6895:
6896: if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6897: add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6898:
6899: if (!zero)
6900: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6901: }
6902: else
6903: {
1.1.1.4 misho 6904: if (offset != 0)
1.1 misho 6905: {
1.1.1.4 misho 6906: OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
1.1 misho 6907: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6908: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6909: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
1.1.1.4 misho 6910: if (common->capture_last_ptr != 0)
6911: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
1.1 misho 6912: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6913: }
6914: else
6915: {
1.1.1.4 misho 6916: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6917: OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
1.1 misho 6918: if (opcode == OP_SBRAPOS)
1.1.1.4 misho 6919: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6920: OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
1.1 misho 6921: }
6922:
6923: if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6924: add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6925:
6926: if (!zero)
6927: {
6928: if (framesize < 0)
6929: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6930: else
6931: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6932: }
6933: }
1.1.1.4 misho 6934:
6935: if (needs_control_head)
6936: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
6937:
1.1 misho 6938: JUMPTO(SLJIT_JUMP, loop);
6939: flush_stubs(common);
6940:
1.1.1.4 misho 6941: compile_backtrackingpath(common, backtrack->top);
1.1 misho 6942: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6943: return NULL;
1.1.1.3 misho 6944: set_jumps(backtrack->topbacktracks, LABEL());
1.1 misho 6945:
6946: if (framesize < 0)
6947: {
1.1.1.4 misho 6948: if (offset != 0)
1.1 misho 6949: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6950: else
6951: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6952: }
6953: else
6954: {
1.1.1.4 misho 6955: if (offset != 0)
1.1 misho 6956: {
6957: /* Last alternative. */
6958: if (*cc == OP_KETRPOS)
1.1.1.4 misho 6959: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 6960: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6961: }
6962: else
6963: {
1.1.1.4 misho 6964: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6965: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
1.1 misho 6966: }
6967: }
6968:
6969: if (*cc == OP_KETRPOS)
6970: break;
6971: ccbegin = cc + 1 + LINK_SIZE;
6972: }
6973:
1.1.1.4 misho 6974: /* We don't have to restore the control head in case of a failed match. */
6975:
1.1.1.3 misho 6976: backtrack->topbacktracks = NULL;
1.1 misho 6977: if (!zero)
6978: {
6979: if (framesize < 0)
1.1.1.3 misho 6980: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
1.1.1.4 misho 6981: else /* TMP2 is set to [private_data_ptr] above. */
6982: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
1.1 misho 6983: }
6984:
6985: /* None of them matched. */
6986: set_jumps(emptymatch, LABEL());
1.1.1.4 misho 6987: count_match(common);
1.1 misho 6988: return cc + 1 + LINK_SIZE;
6989: }
6990:
1.1.1.5 ! misho 6991: static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
1.1 misho 6992: {
6993: int class_len;
6994:
6995: *opcode = *cc;
6996: if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6997: {
6998: cc++;
6999: *type = OP_CHAR;
7000: }
7001: else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7002: {
7003: cc++;
7004: *type = OP_CHARI;
7005: *opcode -= OP_STARI - OP_STAR;
7006: }
7007: else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7008: {
7009: cc++;
7010: *type = OP_NOT;
7011: *opcode -= OP_NOTSTAR - OP_STAR;
7012: }
7013: else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7014: {
7015: cc++;
7016: *type = OP_NOTI;
7017: *opcode -= OP_NOTSTARI - OP_STAR;
7018: }
7019: else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7020: {
7021: cc++;
7022: *opcode -= OP_TYPESTAR - OP_STAR;
7023: *type = 0;
7024: }
7025: else
7026: {
1.1.1.5 ! misho 7027: SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
1.1 misho 7028: *type = *opcode;
7029: cc++;
1.1.1.2 misho 7030: class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
1.1 misho 7031: *opcode = cc[class_len - 1];
7032: if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7033: {
7034: *opcode -= OP_CRSTAR - OP_STAR;
7035: if (end != NULL)
7036: *end = cc + class_len;
7037: }
1.1.1.5 ! misho 7038: else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
! 7039: {
! 7040: *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
! 7041: if (end != NULL)
! 7042: *end = cc + class_len;
! 7043: }
1.1 misho 7044: else
7045: {
1.1.1.5 ! misho 7046: SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
! 7047: *max = GET2(cc, (class_len + IMM2_SIZE));
! 7048: *min = GET2(cc, class_len);
1.1 misho 7049:
1.1.1.5 ! misho 7050: if (*min == 0)
1.1 misho 7051: {
1.1.1.5 ! misho 7052: SLJIT_ASSERT(*max != 0);
! 7053: *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
1.1 misho 7054: }
1.1.1.5 ! misho 7055: if (*max == *min)
1.1 misho 7056: *opcode = OP_EXACT;
7057:
7058: if (end != NULL)
1.1.1.2 misho 7059: *end = cc + class_len + 2 * IMM2_SIZE;
1.1 misho 7060: }
7061: return cc;
7062: }
7063:
7064: if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7065: {
1.1.1.5 ! misho 7066: *max = GET2(cc, 0);
1.1.1.2 misho 7067: cc += IMM2_SIZE;
1.1 misho 7068: }
7069:
7070: if (*type == 0)
7071: {
7072: *type = *cc;
7073: if (end != NULL)
7074: *end = next_opcode(common, cc);
7075: cc++;
7076: return cc;
7077: }
7078:
7079: if (end != NULL)
7080: {
7081: *end = cc + 1;
1.1.1.2 misho 7082: #ifdef SUPPORT_UTF
7083: if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
1.1 misho 7084: #endif
7085: }
7086: return cc;
7087: }
7088:
1.1.1.4 misho 7089: static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
1.1 misho 7090: {
7091: DEFINE_COMPILER;
1.1.1.3 misho 7092: backtrack_common *backtrack;
1.1.1.2 misho 7093: pcre_uchar opcode;
7094: pcre_uchar type;
1.1.1.5 ! misho 7095: int max = -1, min = -1;
1.1.1.2 misho 7096: pcre_uchar* end;
1.1 misho 7097: jump_list *nomatch = NULL;
7098: struct sljit_jump *jump = NULL;
7099: struct sljit_label *label;
1.1.1.4 misho 7100: int private_data_ptr = PRIVATE_DATA(cc);
7101: int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
7102: int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7103: int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7104: int tmp_base, tmp_offset;
1.1 misho 7105:
1.1.1.3 misho 7106: PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
1.1 misho 7107:
1.1.1.5 ! misho 7108: cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
1.1 misho 7109:
1.1.1.4 misho 7110: switch(type)
7111: {
7112: case OP_NOT_DIGIT:
7113: case OP_DIGIT:
7114: case OP_NOT_WHITESPACE:
7115: case OP_WHITESPACE:
7116: case OP_NOT_WORDCHAR:
7117: case OP_WORDCHAR:
7118: case OP_ANY:
7119: case OP_ALLANY:
7120: case OP_ANYBYTE:
7121: case OP_ANYNL:
7122: case OP_NOT_HSPACE:
7123: case OP_HSPACE:
7124: case OP_NOT_VSPACE:
7125: case OP_VSPACE:
7126: case OP_CHAR:
7127: case OP_CHARI:
7128: case OP_NOT:
7129: case OP_NOTI:
7130: case OP_CLASS:
7131: case OP_NCLASS:
7132: tmp_base = TMP3;
7133: tmp_offset = 0;
7134: break;
7135:
7136: default:
7137: SLJIT_ASSERT_STOP();
7138: /* Fall through. */
7139:
7140: case OP_EXTUNI:
7141: case OP_XCLASS:
7142: case OP_NOTPROP:
7143: case OP_PROP:
7144: tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
7145: tmp_offset = POSSESSIVE0;
7146: break;
7147: }
7148:
1.1 misho 7149: switch(opcode)
7150: {
7151: case OP_STAR:
7152: case OP_PLUS:
7153: case OP_UPTO:
7154: case OP_CRRANGE:
7155: if (type == OP_ANYNL || type == OP_EXTUNI)
7156: {
1.1.1.4 misho 7157: SLJIT_ASSERT(private_data_ptr == 0);
1.1 misho 7158: if (opcode == OP_STAR || opcode == OP_UPTO)
7159: {
7160: allocate_stack(common, 2);
7161: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7162: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7163: }
7164: else
7165: {
7166: allocate_stack(common, 1);
7167: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7168: }
1.1.1.4 misho 7169:
1.1 misho 7170: if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7171: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
7172:
7173: label = LABEL();
1.1.1.4 misho 7174: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
1.1 misho 7175: if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7176: {
7177: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
7178: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1.1.1.5 ! misho 7179: if (opcode == OP_CRRANGE && min > 0)
! 7180: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
! 7181: if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
! 7182: jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
1.1 misho 7183: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
7184: }
7185:
1.1.1.4 misho 7186: /* We cannot use TMP3 because of this allocate_stack. */
1.1 misho 7187: allocate_stack(common, 1);
7188: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7189: JUMPTO(SLJIT_JUMP, label);
7190: if (jump != NULL)
7191: JUMPHERE(jump);
7192: }
7193: else
7194: {
1.1.1.3 misho 7195: if (opcode == OP_PLUS)
1.1.1.4 misho 7196: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7197: if (private_data_ptr == 0)
7198: allocate_stack(common, 2);
7199: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7200: if (opcode <= OP_PLUS)
7201: OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
7202: else
7203: OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
1.1 misho 7204: label = LABEL();
1.1.1.4 misho 7205: compile_char1_matchingpath(common, type, cc, &nomatch);
7206: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7207: if (opcode <= OP_PLUS)
7208: JUMPTO(SLJIT_JUMP, label);
1.1.1.5 ! misho 7209: else if (opcode == OP_CRRANGE && max == 0)
1.1 misho 7210: {
1.1.1.4 misho 7211: OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
1.1 misho 7212: JUMPTO(SLJIT_JUMP, label);
7213: }
7214: else
7215: {
1.1.1.4 misho 7216: OP1(SLJIT_MOV, TMP1, 0, base, offset1);
1.1 misho 7217: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1.1.1.4 misho 7218: OP1(SLJIT_MOV, base, offset1, TMP1, 0);
1.1.1.5 ! misho 7219: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
1.1 misho 7220: }
7221: set_jumps(nomatch, LABEL());
1.1.1.3 misho 7222: if (opcode == OP_CRRANGE)
1.1.1.5 ! misho 7223: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
1.1.1.4 misho 7224: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
1.1 misho 7225: }
1.1.1.4 misho 7226: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
1.1 misho 7227: break;
7228:
7229: case OP_MINSTAR:
7230: case OP_MINPLUS:
1.1.1.3 misho 7231: if (opcode == OP_MINPLUS)
1.1.1.4 misho 7232: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7233: if (private_data_ptr == 0)
7234: allocate_stack(common, 1);
7235: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7236: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
1.1 misho 7237: break;
7238:
7239: case OP_MINUPTO:
7240: case OP_CRMINRANGE:
1.1.1.4 misho 7241: if (private_data_ptr == 0)
7242: allocate_stack(common, 2);
7243: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7244: OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
1.1 misho 7245: if (opcode == OP_CRMINRANGE)
1.1.1.3 misho 7246: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
1.1.1.4 misho 7247: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
1.1 misho 7248: break;
7249:
7250: case OP_QUERY:
7251: case OP_MINQUERY:
1.1.1.4 misho 7252: if (private_data_ptr == 0)
7253: allocate_stack(common, 1);
7254: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
1.1 misho 7255: if (opcode == OP_QUERY)
1.1.1.4 misho 7256: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7257: BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
1.1 misho 7258: break;
7259:
7260: case OP_EXACT:
1.1.1.5 ! misho 7261: OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
1.1 misho 7262: label = LABEL();
1.1.1.4 misho 7263: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7264: OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
7265: JUMPTO(SLJIT_C_NOT_ZERO, label);
1.1 misho 7266: break;
7267:
7268: case OP_POSSTAR:
7269: case OP_POSPLUS:
7270: case OP_POSUPTO:
1.1.1.4 misho 7271: if (opcode == OP_POSPLUS)
7272: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7273: if (opcode == OP_POSUPTO)
1.1.1.5 ! misho 7274: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
1.1.1.4 misho 7275: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
1.1 misho 7276: label = LABEL();
1.1.1.4 misho 7277: compile_char1_matchingpath(common, type, cc, &nomatch);
7278: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
1.1 misho 7279: if (opcode != OP_POSUPTO)
7280: JUMPTO(SLJIT_JUMP, label);
7281: else
7282: {
1.1.1.4 misho 7283: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
7284: JUMPTO(SLJIT_C_NOT_ZERO, label);
1.1 misho 7285: }
7286: set_jumps(nomatch, LABEL());
1.1.1.4 misho 7287: OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
1.1 misho 7288: break;
7289:
7290: case OP_POSQUERY:
1.1.1.4 misho 7291: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7292: compile_char1_matchingpath(common, type, cc, &nomatch);
7293: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
1.1 misho 7294: set_jumps(nomatch, LABEL());
1.1.1.4 misho 7295: OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
1.1 misho 7296: break;
7297:
1.1.1.5 ! misho 7298: case OP_CRPOSRANGE:
! 7299: /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
! 7300: OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
! 7301: label = LABEL();
! 7302: compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
! 7303: OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
! 7304: JUMPTO(SLJIT_C_NOT_ZERO, label);
! 7305:
! 7306: if (max != 0)
! 7307: {
! 7308: SLJIT_ASSERT(max - min > 0);
! 7309: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
! 7310: }
! 7311: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
! 7312: label = LABEL();
! 7313: compile_char1_matchingpath(common, type, cc, &nomatch);
! 7314: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
! 7315: if (max == 0)
! 7316: JUMPTO(SLJIT_JUMP, label);
! 7317: else
! 7318: {
! 7319: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
! 7320: JUMPTO(SLJIT_C_NOT_ZERO, label);
! 7321: }
! 7322: set_jumps(nomatch, LABEL());
! 7323: OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
! 7324: break;
! 7325:
1.1 misho 7326: default:
7327: SLJIT_ASSERT_STOP();
7328: break;
7329: }
7330:
1.1.1.4 misho 7331: count_match(common);
1.1 misho 7332: return end;
7333: }
7334:
1.1.1.4 misho 7335: static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
1.1 misho 7336: {
7337: DEFINE_COMPILER;
1.1.1.3 misho 7338: backtrack_common *backtrack;
1.1 misho 7339:
1.1.1.4 misho 7340: PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
1.1 misho 7341:
7342: if (*cc == OP_FAIL)
7343: {
1.1.1.3 misho 7344: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
1.1 misho 7345: return cc + 1;
7346: }
7347:
7348: if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
7349: {
7350: /* No need to check notempty conditions. */
1.1.1.4 misho 7351: if (common->accept_label == NULL)
1.1 misho 7352: add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
7353: else
1.1.1.4 misho 7354: JUMPTO(SLJIT_JUMP, common->accept_label);
1.1 misho 7355: return cc + 1;
7356: }
7357:
1.1.1.4 misho 7358: if (common->accept_label == NULL)
1.1 misho 7359: add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
7360: else
1.1.1.4 misho 7361: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
1.1 misho 7362: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7363: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
1.1.1.3 misho 7364: add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
1.1 misho 7365: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
1.1.1.4 misho 7366: if (common->accept_label == NULL)
1.1 misho 7367: add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7368: else
1.1.1.4 misho 7369: CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
1.1 misho 7370: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1.1.1.4 misho 7371: if (common->accept_label == NULL)
1.1 misho 7372: add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
7373: else
1.1.1.4 misho 7374: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
1.1.1.3 misho 7375: add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
1.1 misho 7376: return cc + 1;
7377: }
7378:
1.1.1.4 misho 7379: static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
1.1 misho 7380: {
7381: DEFINE_COMPILER;
7382: int offset = GET2(cc, 1);
1.1.1.4 misho 7383: BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
1.1 misho 7384:
7385: /* Data will be discarded anyway... */
7386: if (common->currententry != NULL)
1.1.1.2 misho 7387: return cc + 1 + IMM2_SIZE;
1.1 misho 7388:
1.1.1.4 misho 7389: if (!optimized_cbracket)
7390: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
1.1 misho 7391: offset <<= 1;
7392: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
1.1.1.4 misho 7393: if (!optimized_cbracket)
7394: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
1.1.1.2 misho 7395: return cc + 1 + IMM2_SIZE;
1.1 misho 7396: }
7397:
1.1.1.4 misho 7398: static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7399: {
7400: DEFINE_COMPILER;
7401: backtrack_common *backtrack;
7402: pcre_uchar opcode = *cc;
7403: pcre_uchar *ccend = cc + 1;
7404:
7405: if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
7406: ccend += 2 + cc[1];
7407:
7408: PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7409:
7410: if (opcode == OP_SKIP)
7411: {
7412: allocate_stack(common, 1);
7413: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7414: return ccend;
7415: }
7416:
7417: if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
7418: {
7419: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7420: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
7421: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
7422: OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
7423: }
7424:
7425: return ccend;
7426: }
7427:
7428: static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
7429:
7430: static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
7431: {
7432: DEFINE_COMPILER;
7433: backtrack_common *backtrack;
7434: BOOL needs_control_head;
7435: int size;
7436:
7437: PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
7438: common->then_trap = BACKTRACK_AS(then_trap_backtrack);
7439: BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
7440: BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
7441: BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
7442:
7443: size = BACKTRACK_AS(then_trap_backtrack)->framesize;
7444: size = 3 + (size < 0 ? 0 : size);
7445:
7446: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7447: allocate_stack(common, size);
7448: if (size > 3)
7449: OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
7450: else
7451: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
7452: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
7453: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
7454: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
7455:
7456: size = BACKTRACK_AS(then_trap_backtrack)->framesize;
7457: if (size >= 0)
7458: init_frame(common, cc, ccend, size - 1, 0, FALSE);
7459: }
7460:
7461: static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
1.1 misho 7462: {
7463: DEFINE_COMPILER;
1.1.1.3 misho 7464: backtrack_common *backtrack;
1.1.1.4 misho 7465: BOOL has_then_trap = FALSE;
7466: then_trap_backtrack *save_then_trap = NULL;
7467:
7468: SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
7469:
7470: if (common->has_then && common->then_offsets[cc - common->start] != 0)
7471: {
7472: SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
7473: has_then_trap = TRUE;
7474: save_then_trap = common->then_trap;
7475: /* Tail item on backtrack. */
7476: compile_then_trap_matchingpath(common, cc, ccend, parent);
7477: }
1.1 misho 7478:
7479: while (cc < ccend)
7480: {
7481: switch(*cc)
7482: {
7483: case OP_SOD:
7484: case OP_SOM:
7485: case OP_NOT_WORD_BOUNDARY:
7486: case OP_WORD_BOUNDARY:
7487: case OP_NOT_DIGIT:
7488: case OP_DIGIT:
7489: case OP_NOT_WHITESPACE:
7490: case OP_WHITESPACE:
7491: case OP_NOT_WORDCHAR:
7492: case OP_WORDCHAR:
7493: case OP_ANY:
7494: case OP_ALLANY:
7495: case OP_ANYBYTE:
7496: case OP_NOTPROP:
7497: case OP_PROP:
7498: case OP_ANYNL:
7499: case OP_NOT_HSPACE:
7500: case OP_HSPACE:
7501: case OP_NOT_VSPACE:
7502: case OP_VSPACE:
7503: case OP_EXTUNI:
7504: case OP_EODN:
7505: case OP_EOD:
7506: case OP_CIRC:
7507: case OP_CIRCM:
7508: case OP_DOLL:
7509: case OP_DOLLM:
7510: case OP_NOT:
7511: case OP_NOTI:
7512: case OP_REVERSE:
1.1.1.4 misho 7513: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
1.1 misho 7514: break;
7515:
7516: case OP_SET_SOM:
1.1.1.3 misho 7517: PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
7518: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1.1 misho 7519: allocate_stack(common, 1);
7520: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
1.1.1.3 misho 7521: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
1.1 misho 7522: cc++;
7523: break;
7524:
7525: case OP_CHAR:
7526: case OP_CHARI:
1.1.1.3 misho 7527: if (common->mode == JIT_COMPILE)
1.1.1.4 misho 7528: cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
1.1.1.3 misho 7529: else
1.1.1.4 misho 7530: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
1.1 misho 7531: break;
7532:
7533: case OP_STAR:
7534: case OP_MINSTAR:
7535: case OP_PLUS:
7536: case OP_MINPLUS:
7537: case OP_QUERY:
7538: case OP_MINQUERY:
7539: case OP_UPTO:
7540: case OP_MINUPTO:
7541: case OP_EXACT:
7542: case OP_POSSTAR:
7543: case OP_POSPLUS:
7544: case OP_POSQUERY:
7545: case OP_POSUPTO:
7546: case OP_STARI:
7547: case OP_MINSTARI:
7548: case OP_PLUSI:
7549: case OP_MINPLUSI:
7550: case OP_QUERYI:
7551: case OP_MINQUERYI:
7552: case OP_UPTOI:
7553: case OP_MINUPTOI:
7554: case OP_EXACTI:
7555: case OP_POSSTARI:
7556: case OP_POSPLUSI:
7557: case OP_POSQUERYI:
7558: case OP_POSUPTOI:
7559: case OP_NOTSTAR:
7560: case OP_NOTMINSTAR:
7561: case OP_NOTPLUS:
7562: case OP_NOTMINPLUS:
7563: case OP_NOTQUERY:
7564: case OP_NOTMINQUERY:
7565: case OP_NOTUPTO:
7566: case OP_NOTMINUPTO:
7567: case OP_NOTEXACT:
7568: case OP_NOTPOSSTAR:
7569: case OP_NOTPOSPLUS:
7570: case OP_NOTPOSQUERY:
7571: case OP_NOTPOSUPTO:
7572: case OP_NOTSTARI:
7573: case OP_NOTMINSTARI:
7574: case OP_NOTPLUSI:
7575: case OP_NOTMINPLUSI:
7576: case OP_NOTQUERYI:
7577: case OP_NOTMINQUERYI:
7578: case OP_NOTUPTOI:
7579: case OP_NOTMINUPTOI:
7580: case OP_NOTEXACTI:
7581: case OP_NOTPOSSTARI:
7582: case OP_NOTPOSPLUSI:
7583: case OP_NOTPOSQUERYI:
7584: case OP_NOTPOSUPTOI:
7585: case OP_TYPESTAR:
7586: case OP_TYPEMINSTAR:
7587: case OP_TYPEPLUS:
7588: case OP_TYPEMINPLUS:
7589: case OP_TYPEQUERY:
7590: case OP_TYPEMINQUERY:
7591: case OP_TYPEUPTO:
7592: case OP_TYPEMINUPTO:
7593: case OP_TYPEEXACT:
7594: case OP_TYPEPOSSTAR:
7595: case OP_TYPEPOSPLUS:
7596: case OP_TYPEPOSQUERY:
7597: case OP_TYPEPOSUPTO:
1.1.1.4 misho 7598: cc = compile_iterator_matchingpath(common, cc, parent);
1.1 misho 7599: break;
7600:
7601: case OP_CLASS:
7602: case OP_NCLASS:
1.1.1.5 ! misho 7603: if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
1.1.1.4 misho 7604: cc = compile_iterator_matchingpath(common, cc, parent);
1.1 misho 7605: else
1.1.1.4 misho 7606: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
1.1 misho 7607: break;
7608:
1.1.1.4 misho 7609: #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1 misho 7610: case OP_XCLASS:
1.1.1.5 ! misho 7611: if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
1.1.1.4 misho 7612: cc = compile_iterator_matchingpath(common, cc, parent);
1.1 misho 7613: else
1.1.1.4 misho 7614: cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
1.1 misho 7615: break;
7616: #endif
7617:
7618: case OP_REF:
7619: case OP_REFI:
1.1.1.5 ! misho 7620: if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
1.1.1.4 misho 7621: cc = compile_ref_iterator_matchingpath(common, cc, parent);
1.1 misho 7622: else
1.1.1.5 ! misho 7623: {
! 7624: compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
! 7625: cc += 1 + IMM2_SIZE;
! 7626: }
! 7627: break;
! 7628:
! 7629: case OP_DNREF:
! 7630: case OP_DNREFI:
! 7631: if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
! 7632: cc = compile_ref_iterator_matchingpath(common, cc, parent);
! 7633: else
! 7634: {
! 7635: compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
! 7636: compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
! 7637: cc += 1 + 2 * IMM2_SIZE;
! 7638: }
1.1 misho 7639: break;
7640:
7641: case OP_RECURSE:
1.1.1.4 misho 7642: cc = compile_recurse_matchingpath(common, cc, parent);
7643: break;
7644:
7645: case OP_CALLOUT:
7646: cc = compile_callout_matchingpath(common, cc, parent);
1.1 misho 7647: break;
7648:
7649: case OP_ASSERT:
7650: case OP_ASSERT_NOT:
7651: case OP_ASSERTBACK:
7652: case OP_ASSERTBACK_NOT:
1.1.1.3 misho 7653: PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
1.1.1.4 misho 7654: cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
1.1 misho 7655: break;
7656:
7657: case OP_BRAMINZERO:
1.1.1.3 misho 7658: PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
1.1 misho 7659: cc = bracketend(cc + 1);
7660: if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
7661: {
7662: allocate_stack(common, 1);
7663: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7664: }
7665: else
7666: {
7667: allocate_stack(common, 2);
7668: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7669: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
7670: }
1.1.1.4 misho 7671: BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
1.1 misho 7672: if (cc[1] > OP_ASSERTBACK_NOT)
1.1.1.4 misho 7673: count_match(common);
1.1 misho 7674: break;
7675:
7676: case OP_ONCE:
7677: case OP_ONCE_NC:
7678: case OP_BRA:
7679: case OP_CBRA:
7680: case OP_COND:
7681: case OP_SBRA:
7682: case OP_SCBRA:
7683: case OP_SCOND:
1.1.1.4 misho 7684: cc = compile_bracket_matchingpath(common, cc, parent);
1.1 misho 7685: break;
7686:
7687: case OP_BRAZERO:
7688: if (cc[1] > OP_ASSERTBACK_NOT)
1.1.1.4 misho 7689: cc = compile_bracket_matchingpath(common, cc, parent);
1.1 misho 7690: else
7691: {
1.1.1.3 misho 7692: PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
1.1.1.4 misho 7693: cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
1.1 misho 7694: }
7695: break;
7696:
7697: case OP_BRAPOS:
7698: case OP_CBRAPOS:
7699: case OP_SBRAPOS:
7700: case OP_SCBRAPOS:
7701: case OP_BRAPOSZERO:
1.1.1.4 misho 7702: cc = compile_bracketpos_matchingpath(common, cc, parent);
1.1.1.3 misho 7703: break;
7704:
7705: case OP_MARK:
7706: PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
7707: SLJIT_ASSERT(common->mark_ptr != 0);
7708: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1.1.1.4 misho 7709: allocate_stack(common, common->has_skip_arg ? 5 : 1);
1.1.1.3 misho 7710: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1.1.1.4 misho 7711: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
7712: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
1.1.1.3 misho 7713: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
7714: OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
1.1.1.4 misho 7715: if (common->has_skip_arg)
7716: {
7717: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7718: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
7719: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
7720: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
7721: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
7722: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7723: }
1.1.1.3 misho 7724: cc += 1 + 2 + cc[1];
7725: break;
7726:
1.1.1.4 misho 7727: case OP_PRUNE:
7728: case OP_PRUNE_ARG:
7729: case OP_SKIP:
7730: case OP_SKIP_ARG:
7731: case OP_THEN:
7732: case OP_THEN_ARG:
1.1.1.3 misho 7733: case OP_COMMIT:
1.1.1.4 misho 7734: cc = compile_control_verb_matchingpath(common, cc, parent);
1.1 misho 7735: break;
7736:
7737: case OP_FAIL:
7738: case OP_ACCEPT:
7739: case OP_ASSERT_ACCEPT:
1.1.1.4 misho 7740: cc = compile_fail_accept_matchingpath(common, cc, parent);
1.1 misho 7741: break;
7742:
7743: case OP_CLOSE:
1.1.1.4 misho 7744: cc = compile_close_matchingpath(common, cc);
1.1 misho 7745: break;
7746:
7747: case OP_SKIPZERO:
7748: cc = bracketend(cc + 1);
7749: break;
7750:
7751: default:
7752: SLJIT_ASSERT_STOP();
7753: return;
7754: }
7755: if (cc == NULL)
7756: return;
7757: }
1.1.1.4 misho 7758:
7759: if (has_then_trap)
7760: {
7761: /* Head item on backtrack. */
7762: PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
7763: BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
7764: BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
7765: common->then_trap = save_then_trap;
7766: }
1.1 misho 7767: SLJIT_ASSERT(cc == ccend);
7768: }
7769:
1.1.1.3 misho 7770: #undef PUSH_BACKTRACK
7771: #undef PUSH_BACKTRACK_NOVALUE
7772: #undef BACKTRACK_AS
1.1 misho 7773:
1.1.1.4 misho 7774: #define COMPILE_BACKTRACKINGPATH(current) \
1.1 misho 7775: do \
7776: { \
1.1.1.4 misho 7777: compile_backtrackingpath(common, (current)); \
1.1 misho 7778: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7779: return; \
7780: } \
7781: while (0)
7782:
1.1.1.3 misho 7783: #define CURRENT_AS(type) ((type *)current)
1.1 misho 7784:
1.1.1.4 misho 7785: static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 7786: {
7787: DEFINE_COMPILER;
1.1.1.2 misho 7788: pcre_uchar *cc = current->cc;
7789: pcre_uchar opcode;
7790: pcre_uchar type;
1.1.1.5 ! misho 7791: int max = -1, min = -1;
1.1 misho 7792: struct sljit_label *label = NULL;
7793: struct sljit_jump *jump = NULL;
1.1.1.3 misho 7794: jump_list *jumplist = NULL;
1.1.1.4 misho 7795: int private_data_ptr = PRIVATE_DATA(cc);
7796: int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
7797: int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7798: int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
1.1 misho 7799:
1.1.1.5 ! misho 7800: cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
1.1 misho 7801:
7802: switch(opcode)
7803: {
7804: case OP_STAR:
7805: case OP_PLUS:
7806: case OP_UPTO:
7807: case OP_CRRANGE:
7808: if (type == OP_ANYNL || type == OP_EXTUNI)
7809: {
1.1.1.4 misho 7810: SLJIT_ASSERT(private_data_ptr == 0);
1.1.1.3 misho 7811: set_jumps(current->topbacktracks, LABEL());
1.1 misho 7812: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7813: free_stack(common, 1);
1.1.1.4 misho 7814: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1 misho 7815: }
7816: else
7817: {
1.1.1.4 misho 7818: if (opcode == OP_UPTO)
1.1.1.5 ! misho 7819: min = 0;
1.1.1.4 misho 7820: if (opcode <= OP_PLUS)
7821: {
7822: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7823: jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
7824: }
7825: else
7826: {
7827: OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7828: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
1.1.1.5 ! misho 7829: jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
1.1.1.4 misho 7830: OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
7831: }
1.1 misho 7832: skip_char_back(common);
1.1.1.4 misho 7833: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7834: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1.1.3 misho 7835: if (opcode == OP_CRRANGE)
7836: set_jumps(current->topbacktracks, LABEL());
1.1 misho 7837: JUMPHERE(jump);
1.1.1.4 misho 7838: if (private_data_ptr == 0)
7839: free_stack(common, 2);
1.1.1.3 misho 7840: if (opcode == OP_PLUS)
7841: set_jumps(current->topbacktracks, LABEL());
1.1 misho 7842: }
7843: break;
7844:
7845: case OP_MINSTAR:
7846: case OP_MINPLUS:
1.1.1.4 misho 7847: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7848: compile_char1_matchingpath(common, type, cc, &jumplist);
7849: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7850: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1.1.3 misho 7851: set_jumps(jumplist, LABEL());
1.1.1.4 misho 7852: if (private_data_ptr == 0)
7853: free_stack(common, 1);
1.1.1.3 misho 7854: if (opcode == OP_MINPLUS)
7855: set_jumps(current->topbacktracks, LABEL());
1.1 misho 7856: break;
7857:
7858: case OP_MINUPTO:
7859: case OP_CRMINRANGE:
7860: if (opcode == OP_CRMINRANGE)
7861: {
7862: label = LABEL();
1.1.1.3 misho 7863: set_jumps(current->topbacktracks, label);
1.1 misho 7864: }
1.1.1.4 misho 7865: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7866: compile_char1_matchingpath(common, type, cc, &jumplist);
1.1 misho 7867:
1.1.1.4 misho 7868: OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7869: OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
1.1 misho 7870: OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1.1.1.4 misho 7871: OP1(SLJIT_MOV, base, offset1, TMP1, 0);
1.1 misho 7872:
7873: if (opcode == OP_CRMINRANGE)
1.1.1.5 ! misho 7874: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
1.1 misho 7875:
1.1.1.5 ! misho 7876: if (opcode == OP_CRMINRANGE && max == 0)
1.1.1.4 misho 7877: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1 misho 7878: else
1.1.1.5 ! misho 7879: CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1 misho 7880:
1.1.1.3 misho 7881: set_jumps(jumplist, LABEL());
1.1.1.4 misho 7882: if (private_data_ptr == 0)
7883: free_stack(common, 2);
1.1 misho 7884: break;
7885:
7886: case OP_QUERY:
1.1.1.4 misho 7887: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7888: OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
7889: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1 misho 7890: jump = JUMP(SLJIT_JUMP);
1.1.1.3 misho 7891: set_jumps(current->topbacktracks, LABEL());
1.1.1.4 misho 7892: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7893: OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
7894: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1 misho 7895: JUMPHERE(jump);
1.1.1.4 misho 7896: if (private_data_ptr == 0)
7897: free_stack(common, 1);
1.1 misho 7898: break;
7899:
7900: case OP_MINQUERY:
1.1.1.4 misho 7901: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7902: OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
1.1 misho 7903: jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
1.1.1.4 misho 7904: compile_char1_matchingpath(common, type, cc, &jumplist);
7905: JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1.1.3 misho 7906: set_jumps(jumplist, LABEL());
1.1 misho 7907: JUMPHERE(jump);
1.1.1.4 misho 7908: if (private_data_ptr == 0)
7909: free_stack(common, 1);
1.1 misho 7910: break;
7911:
7912: case OP_EXACT:
7913: case OP_POSPLUS:
1.1.1.5 ! misho 7914: case OP_CRPOSRANGE:
1.1.1.3 misho 7915: set_jumps(current->topbacktracks, LABEL());
1.1 misho 7916: break;
7917:
7918: case OP_POSSTAR:
7919: case OP_POSQUERY:
7920: case OP_POSUPTO:
7921: break;
7922:
7923: default:
7924: SLJIT_ASSERT_STOP();
7925: break;
7926: }
7927: }
7928:
1.1.1.4 misho 7929: static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 7930: {
7931: DEFINE_COMPILER;
1.1.1.2 misho 7932: pcre_uchar *cc = current->cc;
1.1.1.5 ! misho 7933: BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
1.1.1.2 misho 7934: pcre_uchar type;
1.1 misho 7935:
1.1.1.5 ! misho 7936: type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
! 7937:
1.1 misho 7938: if ((type & 0x1) == 0)
7939: {
1.1.1.5 ! misho 7940: /* Maximize case. */
1.1.1.3 misho 7941: set_jumps(current->topbacktracks, LABEL());
1.1 misho 7942: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7943: free_stack(common, 1);
1.1.1.4 misho 7944: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1 misho 7945: return;
7946: }
7947:
7948: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
1.1.1.4 misho 7949: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
1.1.1.3 misho 7950: set_jumps(current->topbacktracks, LABEL());
1.1.1.5 ! misho 7951: free_stack(common, ref ? 2 : 3);
1.1 misho 7952: }
7953:
1.1.1.4 misho 7954: static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 7955: {
7956: DEFINE_COMPILER;
7957:
1.1.1.4 misho 7958: if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
7959: compile_backtrackingpath(common, current->top);
1.1.1.3 misho 7960: set_jumps(current->topbacktracks, LABEL());
1.1.1.4 misho 7961: if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
7962: return;
1.1.1.3 misho 7963:
7964: if (common->has_set_som && common->mark_ptr != 0)
7965: {
7966: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7967: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7968: free_stack(common, 2);
7969: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
7970: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
7971: }
7972: else if (common->has_set_som || common->mark_ptr != 0)
7973: {
7974: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7975: free_stack(common, 1);
7976: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
7977: }
1.1 misho 7978: }
7979:
1.1.1.4 misho 7980: static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 7981: {
7982: DEFINE_COMPILER;
1.1.1.2 misho 7983: pcre_uchar *cc = current->cc;
7984: pcre_uchar bra = OP_BRA;
1.1 misho 7985: struct sljit_jump *brajump = NULL;
7986:
7987: SLJIT_ASSERT(*cc != OP_BRAMINZERO);
7988: if (*cc == OP_BRAZERO)
7989: {
7990: bra = *cc;
7991: cc++;
7992: }
7993:
7994: if (bra == OP_BRAZERO)
7995: {
1.1.1.3 misho 7996: SLJIT_ASSERT(current->topbacktracks == NULL);
1.1 misho 7997: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7998: }
7999:
1.1.1.3 misho 8000: if (CURRENT_AS(assert_backtrack)->framesize < 0)
1.1 misho 8001: {
1.1.1.3 misho 8002: set_jumps(current->topbacktracks, LABEL());
1.1 misho 8003:
8004: if (bra == OP_BRAZERO)
8005: {
8006: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
1.1.1.4 misho 8007: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
1.1 misho 8008: free_stack(common, 1);
8009: }
8010: return;
8011: }
8012:
8013: if (bra == OP_BRAZERO)
8014: {
8015: if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8016: {
8017: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
1.1.1.4 misho 8018: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
1.1 misho 8019: free_stack(common, 1);
8020: return;
8021: }
8022: free_stack(common, 1);
8023: brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8024: }
8025:
8026: if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8027: {
1.1.1.4 misho 8028: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr);
1.1 misho 8029: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 8030: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
1.1 misho 8031:
1.1.1.3 misho 8032: set_jumps(current->topbacktracks, LABEL());
1.1 misho 8033: }
8034: else
1.1.1.3 misho 8035: set_jumps(current->topbacktracks, LABEL());
1.1 misho 8036:
8037: if (bra == OP_BRAZERO)
8038: {
8039: /* We know there is enough place on the stack. */
1.1.1.4 misho 8040: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
1.1 misho 8041: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
1.1.1.4 misho 8042: JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
1.1 misho 8043: JUMPHERE(brajump);
8044: }
8045: }
8046:
1.1.1.4 misho 8047: static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 8048: {
8049: DEFINE_COMPILER;
1.1.1.4 misho 8050: int opcode, stacksize, count;
1.1 misho 8051: int offset = 0;
1.1.1.4 misho 8052: int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8053: int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
1.1.1.2 misho 8054: pcre_uchar *cc = current->cc;
8055: pcre_uchar *ccbegin;
8056: pcre_uchar *ccprev;
1.1 misho 8057: jump_list *jumplist = NULL;
8058: jump_list *jumplistitem = NULL;
1.1.1.2 misho 8059: pcre_uchar bra = OP_BRA;
8060: pcre_uchar ket;
1.1.1.3 misho 8061: assert_backtrack *assert;
1.1 misho 8062: BOOL has_alternatives;
1.1.1.4 misho 8063: BOOL needs_control_head = FALSE;
1.1 misho 8064: struct sljit_jump *brazero = NULL;
8065: struct sljit_jump *once = NULL;
8066: struct sljit_jump *cond = NULL;
1.1.1.4 misho 8067: struct sljit_label *rmin_label = NULL;
8068: struct sljit_label *exact_label = NULL;
1.1 misho 8069:
8070: if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8071: {
8072: bra = *cc;
8073: cc++;
8074: }
8075:
8076: opcode = *cc;
1.1.1.4 misho 8077: ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8078: ket = *ccbegin;
8079: if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8080: {
8081: repeat_ptr = PRIVATE_DATA(ccbegin);
8082: repeat_type = PRIVATE_DATA(ccbegin + 2);
8083: repeat_count = PRIVATE_DATA(ccbegin + 3);
8084: SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8085: if (repeat_type == OP_UPTO)
8086: ket = OP_KETRMAX;
8087: if (repeat_type == OP_MINUPTO)
8088: ket = OP_KETRMIN;
8089: }
1.1 misho 8090: ccbegin = cc;
8091: cc += GET(cc, 1);
8092: has_alternatives = *cc == OP_ALT;
8093: if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
1.1.1.3 misho 8094: has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
1.1 misho 8095: if (opcode == OP_CBRA || opcode == OP_SCBRA)
8096: offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8097: if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8098: opcode = OP_SCOND;
8099: if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8100: opcode = OP_ONCE;
8101:
1.1.1.4 misho 8102: /* Decoding the needs_control_head in framesize. */
8103: if (opcode == OP_ONCE)
8104: {
8105: needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8106: CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8107: }
8108:
8109: if (ket != OP_KET && repeat_type != 0)
8110: {
8111: /* TMP1 is used in OP_KETRMIN below. */
8112: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8113: free_stack(common, 1);
8114: if (repeat_type == OP_UPTO)
8115: OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8116: else
8117: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
8118: }
8119:
1.1 misho 8120: if (ket == OP_KETRMAX)
8121: {
1.1.1.3 misho 8122: if (bra == OP_BRAZERO)
1.1 misho 8123: {
8124: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8125: free_stack(common, 1);
8126: brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8127: }
8128: }
8129: else if (ket == OP_KETRMIN)
8130: {
8131: if (bra != OP_BRAMINZERO)
8132: {
8133: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
1.1.1.4 misho 8134: if (repeat_type != 0)
8135: {
8136: /* TMP1 was set a few lines above. */
8137: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8138: /* Drop STR_PTR for non-greedy plus quantifier. */
8139: if (opcode != OP_ONCE)
8140: free_stack(common, 1);
8141: }
8142: else if (opcode >= OP_SBRA || opcode == OP_ONCE)
1.1 misho 8143: {
8144: /* Checking zero-length iteration. */
1.1.1.3 misho 8145: if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
1.1.1.4 misho 8146: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
1.1 misho 8147: else
8148: {
1.1.1.4 misho 8149: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8150: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
1.1 misho 8151: }
1.1.1.4 misho 8152: /* Drop STR_PTR for non-greedy plus quantifier. */
1.1 misho 8153: if (opcode != OP_ONCE)
8154: free_stack(common, 1);
8155: }
8156: else
1.1.1.4 misho 8157: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
1.1 misho 8158: }
1.1.1.4 misho 8159: rmin_label = LABEL();
8160: if (repeat_type != 0)
8161: OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
1.1 misho 8162: }
8163: else if (bra == OP_BRAZERO)
8164: {
8165: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8166: free_stack(common, 1);
8167: brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8168: }
1.1.1.4 misho 8169: else if (repeat_type == OP_EXACT)
8170: {
8171: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8172: exact_label = LABEL();
8173: }
8174:
8175: if (offset != 0)
8176: {
8177: if (common->capture_last_ptr != 0)
8178: {
8179: SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8180: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8181: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8182: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
8183: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8184: free_stack(common, 3);
8185: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0);
8186: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
8187: }
8188: else if (common->optimized_cbracket[offset >> 1] == 0)
8189: {
8190: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8191: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8192: free_stack(common, 2);
8193: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8194: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
8195: }
8196: }
1.1 misho 8197:
8198: if (SLJIT_UNLIKELY(opcode == OP_ONCE))
8199: {
1.1.1.3 misho 8200: if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
1.1 misho 8201: {
1.1.1.4 misho 8202: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
1.1 misho 8203: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8204: }
8205: once = JUMP(SLJIT_JUMP);
8206: }
8207: else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8208: {
8209: if (has_alternatives)
8210: {
8211: /* Always exactly one alternative. */
8212: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8213: free_stack(common, 1);
8214:
8215: jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
8216: if (SLJIT_UNLIKELY(!jumplistitem))
8217: return;
8218: jumplist = jumplistitem;
8219: jumplistitem->next = NULL;
8220: jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
8221: }
8222: }
8223: else if (*cc == OP_ALT)
8224: {
8225: /* Build a jump list. Get the last successfully matched branch index. */
8226: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8227: free_stack(common, 1);
8228: count = 1;
8229: do
8230: {
8231: /* Append as the last item. */
8232: if (jumplist != NULL)
8233: {
8234: jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
8235: jumplistitem = jumplistitem->next;
8236: }
8237: else
8238: {
8239: jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
8240: jumplist = jumplistitem;
8241: }
8242:
8243: if (SLJIT_UNLIKELY(!jumplistitem))
8244: return;
8245:
8246: jumplistitem->next = NULL;
8247: jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
8248: cc += GET(cc, 1);
8249: }
8250: while (*cc == OP_ALT);
8251:
8252: cc = ccbegin + GET(ccbegin, 1);
8253: }
8254:
1.1.1.4 misho 8255: COMPILE_BACKTRACKINGPATH(current->top);
1.1.1.3 misho 8256: if (current->topbacktracks)
8257: set_jumps(current->topbacktracks, LABEL());
1.1 misho 8258:
8259: if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8260: {
8261: /* Conditional block always has at most one alternative. */
8262: if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
8263: {
8264: SLJIT_ASSERT(has_alternatives);
1.1.1.3 misho 8265: assert = CURRENT_AS(bracket_backtrack)->u.assert;
1.1 misho 8266: if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
8267: {
1.1.1.4 misho 8268: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
1.1 misho 8269: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 8270: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
1.1 misho 8271: }
8272: cond = JUMP(SLJIT_JUMP);
1.1.1.3 misho 8273: set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
1.1 misho 8274: }
1.1.1.3 misho 8275: else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
1.1 misho 8276: {
8277: SLJIT_ASSERT(has_alternatives);
8278: cond = JUMP(SLJIT_JUMP);
1.1.1.3 misho 8279: set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
1.1 misho 8280: }
8281: else
8282: SLJIT_ASSERT(!has_alternatives);
8283: }
8284:
8285: if (has_alternatives)
8286: {
8287: count = 1;
8288: do
8289: {
8290: current->top = NULL;
1.1.1.3 misho 8291: current->topbacktracks = NULL;
8292: current->nextbacktracks = NULL;
1.1.1.4 misho 8293: /* Conditional blocks always have an additional alternative, even if it is empty. */
1.1 misho 8294: if (*cc == OP_ALT)
8295: {
8296: ccprev = cc + 1 + LINK_SIZE;
8297: cc += GET(cc, 1);
8298: if (opcode != OP_COND && opcode != OP_SCOND)
8299: {
1.1.1.4 misho 8300: if (opcode != OP_ONCE)
8301: {
8302: if (private_data_ptr != 0)
8303: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8304: else
8305: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8306: }
1.1 misho 8307: else
1.1.1.4 misho 8308: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
1.1 misho 8309: }
1.1.1.4 misho 8310: compile_matchingpath(common, ccprev, cc, current);
1.1 misho 8311: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8312: return;
8313: }
8314:
1.1.1.4 misho 8315: /* Instructions after the current alternative is successfully matched. */
8316: /* There is a similar code in compile_bracket_matchingpath. */
1.1 misho 8317: if (opcode == OP_ONCE)
1.1.1.4 misho 8318: match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
1.1 misho 8319:
8320: stacksize = 0;
1.1.1.4 misho 8321: if (repeat_type == OP_MINUPTO)
8322: {
8323: /* We need to preserve the counter. TMP2 will be used below. */
8324: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
1.1 misho 8325: stacksize++;
1.1.1.4 misho 8326: }
1.1 misho 8327: if (ket != OP_KET || bra != OP_BRA)
8328: stacksize++;
1.1.1.4 misho 8329: if (offset != 0)
8330: {
8331: if (common->capture_last_ptr != 0)
8332: stacksize++;
8333: if (common->optimized_cbracket[offset >> 1] == 0)
8334: stacksize += 2;
8335: }
8336: if (opcode != OP_ONCE)
8337: stacksize++;
1.1 misho 8338:
1.1.1.4 misho 8339: if (stacksize > 0)
8340: allocate_stack(common, stacksize);
1.1 misho 8341:
8342: stacksize = 0;
1.1.1.4 misho 8343: if (repeat_type == OP_MINUPTO)
8344: {
8345: /* TMP2 was set above. */
8346: OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8347: stacksize++;
8348: }
8349:
1.1 misho 8350: if (ket != OP_KET || bra != OP_BRA)
8351: {
8352: if (ket != OP_KET)
8353: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8354: else
8355: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8356: stacksize++;
8357: }
8358:
1.1.1.4 misho 8359: if (offset != 0)
8360: stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8361:
1.1 misho 8362: if (opcode != OP_ONCE)
8363: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
8364:
1.1.1.4 misho 8365: if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
1.1 misho 8366: {
1.1.1.4 misho 8367: /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
8368: SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
1.1 misho 8369: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
8370: }
8371:
1.1.1.4 misho 8372: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
1.1 misho 8373:
8374: if (opcode != OP_ONCE)
8375: {
8376: SLJIT_ASSERT(jumplist);
8377: JUMPHERE(jumplist->jump);
8378: jumplist = jumplist->next;
8379: }
8380:
1.1.1.4 misho 8381: COMPILE_BACKTRACKINGPATH(current->top);
1.1.1.3 misho 8382: if (current->topbacktracks)
8383: set_jumps(current->topbacktracks, LABEL());
8384: SLJIT_ASSERT(!current->nextbacktracks);
1.1 misho 8385: }
8386: while (*cc == OP_ALT);
8387: SLJIT_ASSERT(!jumplist);
8388:
8389: if (cond != NULL)
8390: {
8391: SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
1.1.1.3 misho 8392: assert = CURRENT_AS(bracket_backtrack)->u.assert;
1.1 misho 8393: if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
8394: {
1.1.1.4 misho 8395: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
1.1 misho 8396: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 8397: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
1.1 misho 8398: }
8399: JUMPHERE(cond);
8400: }
8401:
8402: /* Free the STR_PTR. */
1.1.1.4 misho 8403: if (private_data_ptr == 0)
1.1 misho 8404: free_stack(common, 1);
8405: }
8406:
8407: if (offset != 0)
8408: {
8409: /* Using both tmp register is better for instruction scheduling. */
1.1.1.4 misho 8410: if (common->optimized_cbracket[offset >> 1] != 0)
8411: {
8412: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8413: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8414: free_stack(common, 2);
8415: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8416: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
8417: }
8418: else
8419: {
8420: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8421: free_stack(common, 1);
8422: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
8423: }
1.1 misho 8424: }
8425: else if (opcode == OP_SBRA || opcode == OP_SCOND)
8426: {
1.1.1.4 misho 8427: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
1.1 misho 8428: free_stack(common, 1);
8429: }
8430: else if (opcode == OP_ONCE)
8431: {
8432: cc = ccbegin + GET(ccbegin, 1);
1.1.1.4 misho 8433: stacksize = needs_control_head ? 1 : 0;
8434:
1.1.1.3 misho 8435: if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
1.1 misho 8436: {
8437: /* Reset head and drop saved frame. */
1.1.1.4 misho 8438: stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
1.1 misho 8439: }
8440: else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
8441: {
8442: /* The STR_PTR must be released. */
1.1.1.4 misho 8443: stacksize++;
1.1 misho 8444: }
1.1.1.4 misho 8445: free_stack(common, stacksize);
1.1 misho 8446:
8447: JUMPHERE(once);
1.1.1.4 misho 8448: /* Restore previous private_data_ptr */
1.1.1.3 misho 8449: if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
1.1.1.4 misho 8450: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
1.1 misho 8451: else if (ket == OP_KETRMIN)
8452: {
8453: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8454: /* See the comment below. */
8455: free_stack(common, 2);
1.1.1.4 misho 8456: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
1.1 misho 8457: }
8458: }
8459:
1.1.1.4 misho 8460: if (repeat_type == OP_EXACT)
8461: {
8462: OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8463: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
8464: CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
8465: }
8466: else if (ket == OP_KETRMAX)
1.1 misho 8467: {
8468: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
1.1.1.3 misho 8469: if (bra != OP_BRAZERO)
8470: free_stack(common, 1);
1.1.1.4 misho 8471:
8472: CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
1.1 misho 8473: if (bra == OP_BRAZERO)
8474: {
8475: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
1.1.1.4 misho 8476: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
1.1 misho 8477: JUMPHERE(brazero);
1.1.1.3 misho 8478: free_stack(common, 1);
1.1 misho 8479: }
8480: }
8481: else if (ket == OP_KETRMIN)
8482: {
8483: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8484:
1.1.1.3 misho 8485: /* OP_ONCE removes everything in case of a backtrack, so we don't
1.1 misho 8486: need to explicitly release the STR_PTR. The extra release would
8487: affect badly the free_stack(2) above. */
8488: if (opcode != OP_ONCE)
8489: free_stack(common, 1);
1.1.1.4 misho 8490: CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
1.1 misho 8491: if (opcode == OP_ONCE)
8492: free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
8493: else if (bra == OP_BRAMINZERO)
8494: free_stack(common, 1);
8495: }
8496: else if (bra == OP_BRAZERO)
8497: {
8498: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
1.1.1.4 misho 8499: JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
1.1 misho 8500: JUMPHERE(brazero);
8501: }
8502: }
8503:
1.1.1.4 misho 8504: static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 8505: {
8506: DEFINE_COMPILER;
8507: int offset;
8508: struct sljit_jump *jump;
8509:
1.1.1.3 misho 8510: if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
1.1 misho 8511: {
8512: if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
8513: {
8514: offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
8515: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8516: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8517: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
1.1.1.4 misho 8518: if (common->capture_last_ptr != 0)
8519: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
1.1 misho 8520: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
1.1.1.4 misho 8521: if (common->capture_last_ptr != 0)
8522: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
1.1 misho 8523: }
1.1.1.3 misho 8524: set_jumps(current->topbacktracks, LABEL());
8525: free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
1.1 misho 8526: return;
8527: }
8528:
1.1.1.4 misho 8529: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
1.1 misho 8530: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8531:
1.1.1.3 misho 8532: if (current->topbacktracks)
1.1 misho 8533: {
8534: jump = JUMP(SLJIT_JUMP);
1.1.1.3 misho 8535: set_jumps(current->topbacktracks, LABEL());
1.1 misho 8536: /* Drop the stack frame. */
1.1.1.3 misho 8537: free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
1.1 misho 8538: JUMPHERE(jump);
8539: }
1.1.1.4 misho 8540: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
1.1 misho 8541: }
8542:
1.1.1.4 misho 8543: static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 8544: {
1.1.1.3 misho 8545: assert_backtrack backtrack;
1.1 misho 8546:
8547: current->top = NULL;
1.1.1.3 misho 8548: current->topbacktracks = NULL;
8549: current->nextbacktracks = NULL;
1.1 misho 8550: if (current->cc[1] > OP_ASSERTBACK_NOT)
8551: {
1.1.1.4 misho 8552: /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
8553: compile_bracket_matchingpath(common, current->cc, current);
8554: compile_bracket_backtrackingpath(common, current->top);
1.1 misho 8555: }
8556: else
8557: {
1.1.1.3 misho 8558: memset(&backtrack, 0, sizeof(backtrack));
8559: backtrack.common.cc = current->cc;
1.1.1.4 misho 8560: backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
8561: /* Manual call of compile_assert_matchingpath. */
8562: compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
1.1 misho 8563: }
1.1.1.3 misho 8564: SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
1.1 misho 8565: }
8566:
1.1.1.4 misho 8567: static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8568: {
8569: DEFINE_COMPILER;
8570: pcre_uchar opcode = *current->cc;
8571: struct sljit_label *loop;
8572: struct sljit_jump *jump;
8573:
8574: if (opcode == OP_THEN || opcode == OP_THEN_ARG)
8575: {
8576: if (common->then_trap != NULL)
8577: {
8578: SLJIT_ASSERT(common->control_head_ptr != 0);
8579:
8580: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
8581: OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
8582: OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
8583: jump = JUMP(SLJIT_JUMP);
8584:
8585: loop = LABEL();
8586: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
8587: JUMPHERE(jump);
8588: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
8589: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
8590: add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
8591: return;
8592: }
8593: else if (common->positive_assert)
8594: {
8595: add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
8596: return;
8597: }
8598: }
8599:
8600: if (common->local_exit)
8601: {
8602: if (common->quit_label == NULL)
8603: add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
8604: else
8605: JUMPTO(SLJIT_JUMP, common->quit_label);
8606: return;
8607: }
8608:
8609: if (opcode == OP_SKIP_ARG)
8610: {
8611: SLJIT_ASSERT(common->control_head_ptr != 0);
8612: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
8613: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
8614: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
8615: sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
8616: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
8617:
8618: OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
8619: add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
8620: return;
8621: }
8622:
8623: if (opcode == OP_SKIP)
8624: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8625: else
8626: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
8627: add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
8628: }
8629:
8630: static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8631: {
8632: DEFINE_COMPILER;
8633: struct sljit_jump *jump;
8634: int size;
8635:
8636: if (CURRENT_AS(then_trap_backtrack)->then_trap)
8637: {
8638: common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
8639: return;
8640: }
8641:
8642: size = CURRENT_AS(then_trap_backtrack)->framesize;
8643: size = 3 + (size < 0 ? 0 : size);
8644:
8645: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
8646: free_stack(common, size);
8647: jump = JUMP(SLJIT_JUMP);
8648:
8649: set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
8650: /* STACK_TOP is set by THEN. */
8651: if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
8652: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8653: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8654: free_stack(common, 3);
8655:
8656: JUMPHERE(jump);
8657: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
8658: }
8659:
8660: static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
1.1 misho 8661: {
8662: DEFINE_COMPILER;
1.1.1.4 misho 8663: then_trap_backtrack *save_then_trap = common->then_trap;
1.1 misho 8664:
8665: while (current)
8666: {
1.1.1.3 misho 8667: if (current->nextbacktracks != NULL)
8668: set_jumps(current->nextbacktracks, LABEL());
1.1 misho 8669: switch(*current->cc)
8670: {
8671: case OP_SET_SOM:
8672: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8673: free_stack(common, 1);
8674: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
8675: break;
8676:
8677: case OP_STAR:
8678: case OP_MINSTAR:
8679: case OP_PLUS:
8680: case OP_MINPLUS:
8681: case OP_QUERY:
8682: case OP_MINQUERY:
8683: case OP_UPTO:
8684: case OP_MINUPTO:
8685: case OP_EXACT:
8686: case OP_POSSTAR:
8687: case OP_POSPLUS:
8688: case OP_POSQUERY:
8689: case OP_POSUPTO:
8690: case OP_STARI:
8691: case OP_MINSTARI:
8692: case OP_PLUSI:
8693: case OP_MINPLUSI:
8694: case OP_QUERYI:
8695: case OP_MINQUERYI:
8696: case OP_UPTOI:
8697: case OP_MINUPTOI:
8698: case OP_EXACTI:
8699: case OP_POSSTARI:
8700: case OP_POSPLUSI:
8701: case OP_POSQUERYI:
8702: case OP_POSUPTOI:
8703: case OP_NOTSTAR:
8704: case OP_NOTMINSTAR:
8705: case OP_NOTPLUS:
8706: case OP_NOTMINPLUS:
8707: case OP_NOTQUERY:
8708: case OP_NOTMINQUERY:
8709: case OP_NOTUPTO:
8710: case OP_NOTMINUPTO:
8711: case OP_NOTEXACT:
8712: case OP_NOTPOSSTAR:
8713: case OP_NOTPOSPLUS:
8714: case OP_NOTPOSQUERY:
8715: case OP_NOTPOSUPTO:
8716: case OP_NOTSTARI:
8717: case OP_NOTMINSTARI:
8718: case OP_NOTPLUSI:
8719: case OP_NOTMINPLUSI:
8720: case OP_NOTQUERYI:
8721: case OP_NOTMINQUERYI:
8722: case OP_NOTUPTOI:
8723: case OP_NOTMINUPTOI:
8724: case OP_NOTEXACTI:
8725: case OP_NOTPOSSTARI:
8726: case OP_NOTPOSPLUSI:
8727: case OP_NOTPOSQUERYI:
8728: case OP_NOTPOSUPTOI:
8729: case OP_TYPESTAR:
8730: case OP_TYPEMINSTAR:
8731: case OP_TYPEPLUS:
8732: case OP_TYPEMINPLUS:
8733: case OP_TYPEQUERY:
8734: case OP_TYPEMINQUERY:
8735: case OP_TYPEUPTO:
8736: case OP_TYPEMINUPTO:
8737: case OP_TYPEEXACT:
8738: case OP_TYPEPOSSTAR:
8739: case OP_TYPEPOSPLUS:
8740: case OP_TYPEPOSQUERY:
8741: case OP_TYPEPOSUPTO:
8742: case OP_CLASS:
8743: case OP_NCLASS:
1.1.1.2 misho 8744: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 8745: case OP_XCLASS:
1.1.1.2 misho 8746: #endif
1.1.1.4 misho 8747: compile_iterator_backtrackingpath(common, current);
1.1 misho 8748: break;
8749:
8750: case OP_REF:
8751: case OP_REFI:
1.1.1.5 ! misho 8752: case OP_DNREF:
! 8753: case OP_DNREFI:
1.1.1.4 misho 8754: compile_ref_iterator_backtrackingpath(common, current);
1.1 misho 8755: break;
8756:
8757: case OP_RECURSE:
1.1.1.4 misho 8758: compile_recurse_backtrackingpath(common, current);
1.1 misho 8759: break;
8760:
8761: case OP_ASSERT:
8762: case OP_ASSERT_NOT:
8763: case OP_ASSERTBACK:
8764: case OP_ASSERTBACK_NOT:
1.1.1.4 misho 8765: compile_assert_backtrackingpath(common, current);
1.1 misho 8766: break;
8767:
8768: case OP_ONCE:
8769: case OP_ONCE_NC:
8770: case OP_BRA:
8771: case OP_CBRA:
8772: case OP_COND:
8773: case OP_SBRA:
8774: case OP_SCBRA:
8775: case OP_SCOND:
1.1.1.4 misho 8776: compile_bracket_backtrackingpath(common, current);
1.1 misho 8777: break;
8778:
8779: case OP_BRAZERO:
8780: if (current->cc[1] > OP_ASSERTBACK_NOT)
1.1.1.4 misho 8781: compile_bracket_backtrackingpath(common, current);
1.1 misho 8782: else
1.1.1.4 misho 8783: compile_assert_backtrackingpath(common, current);
1.1 misho 8784: break;
8785:
8786: case OP_BRAPOS:
8787: case OP_CBRAPOS:
8788: case OP_SBRAPOS:
8789: case OP_SCBRAPOS:
8790: case OP_BRAPOSZERO:
1.1.1.4 misho 8791: compile_bracketpos_backtrackingpath(common, current);
1.1 misho 8792: break;
8793:
8794: case OP_BRAMINZERO:
1.1.1.4 misho 8795: compile_braminzero_backtrackingpath(common, current);
1.1.1.3 misho 8796: break;
8797:
8798: case OP_MARK:
1.1.1.4 misho 8799: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
8800: if (common->has_skip_arg)
8801: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8802: free_stack(common, common->has_skip_arg ? 5 : 1);
1.1.1.3 misho 8803: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
1.1.1.4 misho 8804: if (common->has_skip_arg)
8805: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
8806: break;
8807:
8808: case OP_THEN:
8809: case OP_THEN_ARG:
8810: case OP_PRUNE:
8811: case OP_PRUNE_ARG:
8812: case OP_SKIP:
8813: case OP_SKIP_ARG:
8814: compile_control_verb_backtrackingpath(common, current);
1.1.1.3 misho 8815: break;
8816:
8817: case OP_COMMIT:
1.1.1.4 misho 8818: if (!common->local_exit)
8819: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
8820: if (common->quit_label == NULL)
8821: add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
1.1.1.3 misho 8822: else
1.1.1.4 misho 8823: JUMPTO(SLJIT_JUMP, common->quit_label);
1.1 misho 8824: break;
8825:
1.1.1.4 misho 8826: case OP_CALLOUT:
1.1 misho 8827: case OP_FAIL:
8828: case OP_ACCEPT:
8829: case OP_ASSERT_ACCEPT:
1.1.1.3 misho 8830: set_jumps(current->topbacktracks, LABEL());
1.1 misho 8831: break;
8832:
1.1.1.4 misho 8833: case OP_THEN_TRAP:
8834: /* A virtual opcode for then traps. */
8835: compile_then_trap_backtrackingpath(common, current);
8836: break;
8837:
1.1 misho 8838: default:
8839: SLJIT_ASSERT_STOP();
8840: break;
8841: }
8842: current = current->prev;
8843: }
1.1.1.4 misho 8844: common->then_trap = save_then_trap;
1.1 misho 8845: }
8846:
8847: static SLJIT_INLINE void compile_recurse(compiler_common *common)
8848: {
8849: DEFINE_COMPILER;
1.1.1.2 misho 8850: pcre_uchar *cc = common->start + common->currententry->start;
8851: pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
8852: pcre_uchar *ccend = bracketend(cc);
1.1.1.4 misho 8853: BOOL needs_control_head;
8854: int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
8855: int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
1.1 misho 8856: int alternativesize;
1.1.1.4 misho 8857: BOOL needs_frame;
1.1.1.3 misho 8858: backtrack_common altbacktrack;
1.1 misho 8859: struct sljit_jump *jump;
8860:
1.1.1.4 misho 8861: /* Recurse captures then. */
8862: common->then_trap = NULL;
8863:
1.1 misho 8864: SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
1.1.1.4 misho 8865: needs_frame = framesize >= 0;
8866: if (!needs_frame)
1.1 misho 8867: framesize = 0;
8868: alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
8869:
1.1.1.4 misho 8870: SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
1.1 misho 8871: common->currententry->entry = LABEL();
8872: set_jumps(common->currententry->calls, common->currententry->entry);
8873:
1.1.1.3 misho 8874: sljit_emit_fast_enter(compiler, TMP2, 0);
1.1.1.4 misho 8875: allocate_stack(common, private_data_size + framesize + alternativesize);
8876: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
8877: copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
8878: if (needs_control_head)
8879: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
8880: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0);
8881: if (needs_frame)
8882: init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
1.1 misho 8883:
8884: if (alternativesize > 0)
8885: OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8886:
1.1.1.3 misho 8887: memset(&altbacktrack, 0, sizeof(backtrack_common));
1.1.1.4 misho 8888: common->quit_label = NULL;
8889: common->accept_label = NULL;
8890: common->quit = NULL;
1.1 misho 8891: common->accept = NULL;
1.1.1.3 misho 8892: altbacktrack.cc = ccbegin;
1.1 misho 8893: cc += GET(cc, 1);
8894: while (1)
8895: {
1.1.1.3 misho 8896: altbacktrack.top = NULL;
8897: altbacktrack.topbacktracks = NULL;
1.1 misho 8898:
1.1.1.3 misho 8899: if (altbacktrack.cc != ccbegin)
1.1 misho 8900: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8901:
1.1.1.4 misho 8902: compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
1.1 misho 8903: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8904: return;
8905:
8906: add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8907:
1.1.1.4 misho 8908: compile_backtrackingpath(common, altbacktrack.top);
1.1 misho 8909: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8910: return;
1.1.1.3 misho 8911: set_jumps(altbacktrack.topbacktracks, LABEL());
1.1 misho 8912:
8913: if (*cc != OP_ALT)
8914: break;
8915:
1.1.1.3 misho 8916: altbacktrack.cc = cc + 1 + LINK_SIZE;
1.1 misho 8917: cc += GET(cc, 1);
8918: }
1.1.1.3 misho 8919:
1.1.1.4 misho 8920: /* None of them matched. */
1.1 misho 8921: OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
8922: jump = JUMP(SLJIT_JUMP);
8923:
1.1.1.4 misho 8924: if (common->quit != NULL)
8925: {
8926: set_jumps(common->quit, LABEL());
8927: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
8928: if (needs_frame)
8929: {
8930: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
8931: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8932: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
8933: }
8934: OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
8935: common->quit = NULL;
8936: add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
8937: }
8938:
1.1 misho 8939: set_jumps(common->accept, LABEL());
1.1.1.4 misho 8940: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
8941: if (needs_frame)
1.1 misho 8942: {
1.1.1.4 misho 8943: OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
1.1 misho 8944: add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
1.1.1.4 misho 8945: OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
1.1 misho 8946: }
8947: OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
8948:
8949: JUMPHERE(jump);
1.1.1.4 misho 8950: if (common->quit != NULL)
8951: set_jumps(common->quit, LABEL());
8952: copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
8953: free_stack(common, private_data_size + framesize + alternativesize);
8954: if (needs_control_head)
8955: {
8956: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
8957: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
8958: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0);
8959: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
8960: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
8961: }
8962: else
8963: {
8964: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
8965: OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
8966: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0);
8967: }
1.1 misho 8968: sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
8969: }
8970:
1.1.1.4 misho 8971: #undef COMPILE_BACKTRACKINGPATH
1.1 misho 8972: #undef CURRENT_AS
8973:
8974: void
1.1.1.3 misho 8975: PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
1.1 misho 8976: {
8977: struct sljit_compiler *compiler;
1.1.1.3 misho 8978: backtrack_common rootbacktrack;
1.1 misho 8979: compiler_common common_data;
8980: compiler_common *common = &common_data;
1.1.1.2 misho 8981: const pcre_uint8 *tables = re->tables;
1.1 misho 8982: pcre_study_data *study;
1.1.1.4 misho 8983: int private_data_size;
1.1.1.2 misho 8984: pcre_uchar *ccend;
1.1.1.3 misho 8985: executable_functions *functions;
1.1 misho 8986: void *executable_func;
8987: sljit_uw executable_size;
1.1.1.4 misho 8988: struct sljit_label *mainloop_label = NULL;
8989: struct sljit_label *continue_match_label;
8990: struct sljit_label *empty_match_found_label;
8991: struct sljit_label *empty_match_backtrack_label;
8992: struct sljit_label *reset_match_label;
1.1.1.3 misho 8993: struct sljit_jump *jump;
1.1.1.4 misho 8994: struct sljit_jump *minlength_check_failed = NULL;
1.1 misho 8995: struct sljit_jump *reqbyte_notfound = NULL;
8996: struct sljit_jump *empty_match;
1.1.1.4 misho 8997: struct sljit_label *quit_label;
1.1 misho 8998:
8999: SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9000: study = extra->study_data;
9001:
9002: if (!tables)
1.1.1.2 misho 9003: tables = PRIV(default_tables);
1.1 misho 9004:
1.1.1.3 misho 9005: memset(&rootbacktrack, 0, sizeof(backtrack_common));
9006: memset(common, 0, sizeof(compiler_common));
9007: rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
1.1 misho 9008:
1.1.1.3 misho 9009: common->start = rootbacktrack.cc;
1.1 misho 9010: common->fcc = tables + fcc_offset;
1.1.1.4 misho 9011: common->lcc = (sljit_sw)(tables + lcc_offset);
1.1.1.3 misho 9012: common->mode = mode;
1.1 misho 9013: common->nltype = NLTYPE_FIXED;
9014: switch(re->options & PCRE_NEWLINE_BITS)
9015: {
9016: case 0:
9017: /* Compile-time default */
1.1.1.4 misho 9018: switch(NEWLINE)
1.1 misho 9019: {
9020: case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9021: case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9022: default: common->newline = NEWLINE; break;
9023: }
9024: break;
9025: case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9026: case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9027: case PCRE_NEWLINE_CR+
9028: PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9029: case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9030: case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9031: default: return;
9032: }
9033: if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9034: common->bsr_nltype = NLTYPE_ANYCRLF;
9035: else if ((re->options & PCRE_BSR_UNICODE) != 0)
9036: common->bsr_nltype = NLTYPE_ANY;
9037: else
9038: {
9039: #ifdef BSR_ANYCRLF
9040: common->bsr_nltype = NLTYPE_ANYCRLF;
9041: #else
9042: common->bsr_nltype = NLTYPE_ANY;
9043: #endif
9044: }
9045: common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
1.1.1.4 misho 9046: common->ctypes = (sljit_sw)(tables + ctypes_offset);
9047: common->digits[0] = -2;
1.1.1.5 ! misho 9048: common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
1.1 misho 9049: common->name_count = re->name_count;
9050: common->name_entry_size = re->name_entry_size;
9051: common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
1.1.1.2 misho 9052: #ifdef SUPPORT_UTF
1.1.1.4 misho 9053: /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
1.1.1.2 misho 9054: common->utf = (re->options & PCRE_UTF8) != 0;
1.1 misho 9055: #ifdef SUPPORT_UCP
1.1.1.2 misho 9056: common->use_ucp = (re->options & PCRE_UCP) != 0;
1.1 misho 9057: #endif
1.1.1.2 misho 9058: #endif /* SUPPORT_UTF */
1.1.1.3 misho 9059: ccend = bracketend(rootbacktrack.cc);
9060:
9061: /* Calculate the local space size on the stack. */
1.1.1.4 misho 9062: common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9063: common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
9064: if (!common->optimized_cbracket)
9065: return;
9066: #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9067: memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9068: #else
9069: memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9070: #endif
1.1.1.3 misho 9071:
9072: SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
1.1.1.4 misho 9073: #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9074: common->capture_last_ptr = common->ovector_start;
9075: common->ovector_start += sizeof(sljit_sw);
9076: #endif
9077: if (!check_opcode_types(common, rootbacktrack.cc, ccend))
9078: {
9079: SLJIT_FREE(common->optimized_cbracket);
1.1 misho 9080: return;
1.1.1.4 misho 9081: }
1.1.1.3 misho 9082:
9083: /* Checking flags and updating ovector_start. */
9084: if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9085: {
9086: common->req_char_ptr = common->ovector_start;
1.1.1.4 misho 9087: common->ovector_start += sizeof(sljit_sw);
1.1.1.3 misho 9088: }
9089: if (mode != JIT_COMPILE)
9090: {
9091: common->start_used_ptr = common->ovector_start;
1.1.1.4 misho 9092: common->ovector_start += sizeof(sljit_sw);
1.1.1.3 misho 9093: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9094: {
9095: common->hit_start = common->ovector_start;
1.1.1.4 misho 9096: common->ovector_start += 2 * sizeof(sljit_sw);
9097: }
9098: else
9099: {
9100: SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9101: common->needs_start_ptr = TRUE;
1.1.1.3 misho 9102: }
9103: }
9104: if ((re->options & PCRE_FIRSTLINE) != 0)
9105: {
9106: common->first_line_end = common->ovector_start;
1.1.1.4 misho 9107: common->ovector_start += sizeof(sljit_sw);
9108: }
9109: #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9110: common->control_head_ptr = 1;
9111: #endif
9112: if (common->control_head_ptr != 0)
9113: {
9114: common->control_head_ptr = common->ovector_start;
9115: common->ovector_start += sizeof(sljit_sw);
9116: }
9117: if (common->needs_start_ptr && common->has_set_som)
9118: {
9119: /* Saving the real start pointer is necessary. */
9120: common->start_ptr = common->ovector_start;
9121: common->ovector_start += sizeof(sljit_sw);
1.1.1.3 misho 9122: }
1.1.1.4 misho 9123: else
9124: common->needs_start_ptr = FALSE;
1.1.1.3 misho 9125:
9126: /* Aligning ovector to even number of sljit words. */
1.1.1.4 misho 9127: if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9128: common->ovector_start += sizeof(sljit_sw);
9129:
9130: if (common->start_ptr == 0)
9131: common->start_ptr = OVECTOR(0);
9132:
9133: /* Capturing brackets cannot be optimized if callouts are allowed. */
9134: if (common->capture_last_ptr != 0)
9135: memset(common->optimized_cbracket, 0, re->top_bracket + 1);
1.1.1.3 misho 9136:
9137: SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
1.1.1.4 misho 9138: common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9139:
9140: common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si));
9141: if (!common->private_data_ptrs)
9142: {
9143: SLJIT_FREE(common->optimized_cbracket);
1.1 misho 9144: return;
1.1.1.4 misho 9145: }
9146: memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));
9147:
9148: private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9149: set_private_data_ptrs(common, &private_data_size, ccend);
9150: if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9151: {
9152: SLJIT_FREE(common->private_data_ptrs);
9153: SLJIT_FREE(common->optimized_cbracket);
1.1 misho 9154: return;
1.1.1.4 misho 9155: }
9156:
9157: if (common->has_then)
9158: {
9159: common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc);
9160: if (!common->then_offsets)
9161: {
9162: SLJIT_FREE(common->optimized_cbracket);
9163: SLJIT_FREE(common->private_data_ptrs);
9164: return;
9165: }
9166: memset(common->then_offsets, 0, ccend - rootbacktrack.cc);
9167: set_then_offsets(common, rootbacktrack.cc, NULL);
9168: }
1.1 misho 9169:
9170: compiler = sljit_create_compiler();
9171: if (!compiler)
9172: {
1.1.1.4 misho 9173: SLJIT_FREE(common->optimized_cbracket);
9174: SLJIT_FREE(common->private_data_ptrs);
9175: if (common->has_then)
9176: SLJIT_FREE(common->then_offsets);
1.1 misho 9177: return;
9178: }
9179: common->compiler = compiler;
9180:
9181: /* Main pcre_jit_exec entry. */
1.1.1.4 misho 9182: sljit_emit_enter(compiler, 1, 5, 5, private_data_size);
1.1 misho 9183:
9184: /* Register init. */
9185: reset_ovector(common, (re->top_bracket + 1) * 2);
1.1.1.3 misho 9186: if (common->req_char_ptr != 0)
1.1.1.4 misho 9187: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0);
1.1 misho 9188:
1.1.1.2 misho 9189: OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
9190: OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
1.1 misho 9191: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9192: OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9193: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
1.1.1.4 misho 9194: OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
1.1 misho 9195: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9196: OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
1.1.1.4 misho 9197: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
1.1 misho 9198:
1.1.1.3 misho 9199: if (mode == JIT_PARTIAL_SOFT_COMPILE)
1.1.1.4 misho 9200: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
9201: if (common->mark_ptr != 0)
9202: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
9203: if (common->control_head_ptr != 0)
9204: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
1.1.1.3 misho 9205:
1.1 misho 9206: /* Main part of the matching */
9207: if ((re->options & PCRE_ANCHORED) == 0)
9208: {
1.1.1.4 misho 9209: mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
9210: continue_match_label = LABEL();
1.1 misho 9211: /* Forward search if possible. */
1.1.1.3 misho 9212: if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
9213: {
1.1.1.4 misho 9214: if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
9215: { /* Do nothing */ }
9216: else if ((re->flags & PCRE_FIRSTSET) != 0)
1.1.1.3 misho 9217: fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
9218: else if ((re->flags & PCRE_STARTLINE) != 0)
9219: fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
9220: else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
9221: fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
9222: }
1.1 misho 9223: }
1.1.1.4 misho 9224: else
9225: continue_match_label = LABEL();
9226:
9227: if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9228: {
9229: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9230: OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
9231: minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0);
9232: }
1.1.1.3 misho 9233: if (common->req_char_ptr != 0)
9234: reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
1.1 misho 9235:
9236: /* Store the current STR_PTR in OVECTOR(0). */
9237: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
9238: /* Copy the limit of allowed recursions. */
1.1.1.4 misho 9239: OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
9240: if (common->capture_last_ptr != 0)
9241: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
9242:
9243: if (common->needs_start_ptr)
9244: {
9245: SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
9246: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0);
9247: }
9248: else
9249: SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
9250:
1.1.1.3 misho 9251: /* Copy the beginning of the string. */
9252: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9253: {
1.1.1.4 misho 9254: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1.1.1.3 misho 9255: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1.1.1.4 misho 9256: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
1.1.1.3 misho 9257: JUMPHERE(jump);
9258: }
9259: else if (mode == JIT_PARTIAL_HARD_COMPILE)
9260: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1.1 misho 9261:
1.1.1.4 misho 9262: compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack);
1.1 misho 9263: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9264: {
9265: sljit_free_compiler(compiler);
1.1.1.4 misho 9266: SLJIT_FREE(common->optimized_cbracket);
9267: SLJIT_FREE(common->private_data_ptrs);
9268: if (common->has_then)
9269: SLJIT_FREE(common->then_offsets);
1.1 misho 9270: return;
9271: }
9272:
9273: empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1.1.1.4 misho 9274: empty_match_found_label = LABEL();
1.1 misho 9275:
1.1.1.4 misho 9276: common->accept_label = LABEL();
1.1 misho 9277: if (common->accept != NULL)
1.1.1.4 misho 9278: set_jumps(common->accept, common->accept_label);
1.1 misho 9279:
9280: /* This means we have a match. Update the ovector. */
9281: copy_ovector(common, re->top_bracket + 1);
1.1.1.4 misho 9282: common->quit_label = common->forced_quit_label = LABEL();
9283: if (common->quit != NULL)
9284: set_jumps(common->quit, common->quit_label);
9285: if (common->forced_quit != NULL)
9286: set_jumps(common->forced_quit, common->forced_quit_label);
9287: if (minlength_check_failed != NULL)
9288: SET_LABEL(minlength_check_failed, common->forced_quit_label);
1.1.1.2 misho 9289: sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
1.1 misho 9290:
1.1.1.3 misho 9291: if (mode != JIT_COMPILE)
9292: {
9293: common->partialmatchlabel = LABEL();
9294: set_jumps(common->partialmatch, common->partialmatchlabel);
1.1.1.4 misho 9295: return_with_partial_match(common, common->quit_label);
1.1.1.3 misho 9296: }
9297:
1.1.1.4 misho 9298: empty_match_backtrack_label = LABEL();
9299: compile_backtrackingpath(common, rootbacktrack.top);
1.1 misho 9300: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9301: {
9302: sljit_free_compiler(compiler);
1.1.1.4 misho 9303: SLJIT_FREE(common->optimized_cbracket);
9304: SLJIT_FREE(common->private_data_ptrs);
9305: if (common->has_then)
9306: SLJIT_FREE(common->then_offsets);
1.1 misho 9307: return;
9308: }
9309:
1.1.1.3 misho 9310: SLJIT_ASSERT(rootbacktrack.prev == NULL);
1.1.1.4 misho 9311: reset_match_label = LABEL();
1.1.1.3 misho 9312:
9313: if (mode == JIT_PARTIAL_SOFT_COMPILE)
9314: {
9315: /* Update hit_start only in the first time. */
1.1.1.4 misho 9316: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
1.1.1.3 misho 9317: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
9318: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
9319: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
9320: JUMPHERE(jump);
9321: }
1.1 misho 9322:
9323: /* Check we have remaining characters. */
1.1.1.4 misho 9324: if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
9325: {
9326: SLJIT_ASSERT(common->first_line_end != 0);
9327: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
9328: }
9329:
9330: OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
1.1 misho 9331:
9332: if ((re->options & PCRE_ANCHORED) == 0)
9333: {
9334: if ((re->options & PCRE_FIRSTLINE) == 0)
1.1.1.4 misho 9335: CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
1.1 misho 9336: else
1.1.1.4 misho 9337: CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
1.1 misho 9338: }
9339:
1.1.1.3 misho 9340: /* No more remaining characters. */
1.1 misho 9341: if (reqbyte_notfound != NULL)
9342: JUMPHERE(reqbyte_notfound);
1.1.1.3 misho 9343:
9344: if (mode == JIT_PARTIAL_SOFT_COMPILE)
1.1.1.4 misho 9345: CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
1.1.1.3 misho 9346:
1.1 misho 9347: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
1.1.1.4 misho 9348: JUMPTO(SLJIT_JUMP, common->quit_label);
1.1 misho 9349:
9350: flush_stubs(common);
9351:
9352: JUMPHERE(empty_match);
9353: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9354: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
1.1.1.4 misho 9355: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
1.1 misho 9356: OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
1.1.1.4 misho 9357: CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
1.1 misho 9358: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1.1.1.4 misho 9359: CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
9360: JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
1.1 misho 9361:
9362: common->currententry = common->entries;
1.1.1.4 misho 9363: common->local_exit = TRUE;
9364: quit_label = common->quit_label;
1.1 misho 9365: while (common->currententry != NULL)
9366: {
9367: /* Might add new entries. */
9368: compile_recurse(common);
9369: if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9370: {
9371: sljit_free_compiler(compiler);
1.1.1.4 misho 9372: SLJIT_FREE(common->optimized_cbracket);
9373: SLJIT_FREE(common->private_data_ptrs);
9374: if (common->has_then)
9375: SLJIT_FREE(common->then_offsets);
1.1 misho 9376: return;
9377: }
9378: flush_stubs(common);
9379: common->currententry = common->currententry->next;
9380: }
1.1.1.4 misho 9381: common->local_exit = FALSE;
9382: common->quit_label = quit_label;
1.1 misho 9383:
9384: /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
9385: /* This is a (really) rare case. */
9386: set_jumps(common->stackalloc, LABEL());
9387: /* RETURN_ADDR is not a saved register. */
1.1.1.3 misho 9388: sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1.1 misho 9389: OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
9390: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9391: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9392: OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
9393: OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
9394:
9395: sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
1.1.1.3 misho 9396: jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
1.1 misho 9397: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9398: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9399: OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
9400: OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
9401: OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
9402: sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
9403:
9404: /* Allocation failed. */
1.1.1.3 misho 9405: JUMPHERE(jump);
1.1 misho 9406: /* We break the return address cache here, but this is a really rare case. */
9407: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
1.1.1.4 misho 9408: JUMPTO(SLJIT_JUMP, common->quit_label);
1.1 misho 9409:
9410: /* Call limit reached. */
9411: set_jumps(common->calllimit, LABEL());
9412: OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
1.1.1.4 misho 9413: JUMPTO(SLJIT_JUMP, common->quit_label);
1.1 misho 9414:
9415: if (common->revertframes != NULL)
9416: {
9417: set_jumps(common->revertframes, LABEL());
9418: do_revertframes(common);
9419: }
9420: if (common->wordboundary != NULL)
9421: {
9422: set_jumps(common->wordboundary, LABEL());
9423: check_wordboundary(common);
9424: }
9425: if (common->anynewline != NULL)
9426: {
9427: set_jumps(common->anynewline, LABEL());
9428: check_anynewline(common);
9429: }
9430: if (common->hspace != NULL)
9431: {
9432: set_jumps(common->hspace, LABEL());
9433: check_hspace(common);
9434: }
9435: if (common->vspace != NULL)
9436: {
9437: set_jumps(common->vspace, LABEL());
9438: check_vspace(common);
9439: }
9440: if (common->casefulcmp != NULL)
9441: {
9442: set_jumps(common->casefulcmp, LABEL());
9443: do_casefulcmp(common);
9444: }
9445: if (common->caselesscmp != NULL)
9446: {
9447: set_jumps(common->caselesscmp, LABEL());
9448: do_caselesscmp(common);
9449: }
1.1.1.4 misho 9450: if (common->reset_match != NULL)
9451: {
9452: set_jumps(common->reset_match, LABEL());
9453: do_reset_match(common, (re->top_bracket + 1) * 2);
9454: CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
9455: OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9456: JUMPTO(SLJIT_JUMP, reset_match_label);
9457: }
1.1.1.2 misho 9458: #ifdef SUPPORT_UTF
1.1.1.4 misho 9459: #ifndef COMPILE_PCRE32
1.1.1.2 misho 9460: if (common->utfreadchar != NULL)
1.1 misho 9461: {
1.1.1.2 misho 9462: set_jumps(common->utfreadchar, LABEL());
9463: do_utfreadchar(common);
1.1 misho 9464: }
1.1.1.4 misho 9465: #endif /* !COMPILE_PCRE32 */
1.1.1.2 misho 9466: #ifdef COMPILE_PCRE8
9467: if (common->utfreadtype8 != NULL)
1.1 misho 9468: {
1.1.1.2 misho 9469: set_jumps(common->utfreadtype8, LABEL());
9470: do_utfreadtype8(common);
1.1 misho 9471: }
1.1.1.2 misho 9472: #endif /* COMPILE_PCRE8 */
1.1.1.4 misho 9473: #endif /* SUPPORT_UTF */
1.1 misho 9474: #ifdef SUPPORT_UCP
9475: if (common->getucd != NULL)
9476: {
9477: set_jumps(common->getucd, LABEL());
9478: do_getucd(common);
9479: }
9480: #endif
9481:
1.1.1.4 misho 9482: SLJIT_FREE(common->optimized_cbracket);
9483: SLJIT_FREE(common->private_data_ptrs);
9484: if (common->has_then)
9485: SLJIT_FREE(common->then_offsets);
9486:
1.1 misho 9487: executable_func = sljit_generate_code(compiler);
9488: executable_size = sljit_get_generated_code_size(compiler);
9489: sljit_free_compiler(compiler);
9490: if (executable_func == NULL)
9491: return;
9492:
1.1.1.3 misho 9493: /* Reuse the function descriptor if possible. */
9494: if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
9495: functions = (executable_functions *)extra->executable_jit;
9496: else
1.1 misho 9497: {
1.1.1.4 misho 9498: /* Note: If your memory-checker has flagged the allocation below as a
9499: * memory leak, it is probably because you either forgot to call
9500: * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
9501: * pcre16_extra) object, or you called said function after having
9502: * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
9503: * of the object. (The function will only free the JIT data if the
9504: * bit remains set, as the bit indicates that the pointer to the data
9505: * is valid.)
9506: */
1.1.1.3 misho 9507: functions = SLJIT_MALLOC(sizeof(executable_functions));
9508: if (functions == NULL)
9509: {
9510: /* This case is highly unlikely since we just recently
9511: freed a lot of memory. Although not impossible. */
9512: sljit_free_code(executable_func);
9513: return;
9514: }
9515: memset(functions, 0, sizeof(executable_functions));
1.1.1.4 misho 9516: functions->top_bracket = (re->top_bracket + 1) * 2;
9517: functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
1.1.1.3 misho 9518: extra->executable_jit = functions;
9519: extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
1.1 misho 9520: }
9521:
1.1.1.3 misho 9522: functions->executable_funcs[mode] = executable_func;
9523: functions->executable_sizes[mode] = executable_size;
1.1 misho 9524: }
9525:
1.1.1.3 misho 9526: static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
1.1 misho 9527: {
9528: union {
9529: void* executable_func;
9530: jit_function call_executable_func;
9531: } convert_executable_func;
1.1.1.4 misho 9532: pcre_uint8 local_space[MACHINE_STACK_SIZE];
1.1 misho 9533: struct sljit_stack local_stack;
9534:
1.1.1.4 misho 9535: local_stack.top = (sljit_sw)&local_space;
1.1 misho 9536: local_stack.base = local_stack.top;
1.1.1.4 misho 9537: local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
1.1 misho 9538: local_stack.max_limit = local_stack.limit;
9539: arguments->stack = &local_stack;
1.1.1.3 misho 9540: convert_executable_func.executable_func = executable_func;
1.1 misho 9541: return convert_executable_func.call_executable_func(arguments);
9542: }
9543:
9544: int
1.1.1.4 misho 9545: PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
9546: int length, int start_offset, int options, int *offsets, int offset_count)
1.1 misho 9547: {
1.1.1.3 misho 9548: executable_functions *functions = (executable_functions *)extra_data->executable_jit;
1.1 misho 9549: union {
9550: void* executable_func;
9551: jit_function call_executable_func;
9552: } convert_executable_func;
9553: jit_arguments arguments;
1.1.1.4 misho 9554: int max_offset_count;
1.1 misho 9555: int retval;
1.1.1.3 misho 9556: int mode = JIT_COMPILE;
9557:
9558: if ((options & PCRE_PARTIAL_HARD) != 0)
9559: mode = JIT_PARTIAL_HARD_COMPILE;
9560: else if ((options & PCRE_PARTIAL_SOFT) != 0)
9561: mode = JIT_PARTIAL_SOFT_COMPILE;
9562:
9563: if (functions->executable_funcs[mode] == NULL)
1.1.1.4 misho 9564: return PCRE_ERROR_JIT_BADOPTION;
1.1 misho 9565:
9566: /* Sanity checks should be handled by pcre_exec. */
9567: arguments.str = subject + start_offset;
9568: arguments.begin = subject;
9569: arguments.end = subject + length;
1.1.1.3 misho 9570: arguments.mark_ptr = NULL;
9571: /* JIT decreases this value less frequently than the interpreter. */
1.1.1.4 misho 9572: arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
9573: if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
9574: arguments.limit_match = functions->limit_match;
1.1 misho 9575: arguments.notbol = (options & PCRE_NOTBOL) != 0;
9576: arguments.noteol = (options & PCRE_NOTEOL) != 0;
9577: arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
9578: arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
9579: arguments.offsets = offsets;
1.1.1.4 misho 9580: arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
9581: arguments.real_offset_count = offset_count;
1.1 misho 9582:
1.1.1.4 misho 9583: /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
1.1 misho 9584: the output vector for storing captured strings, with the remainder used as
9585: workspace. We don't need the workspace here. For compatibility, we limit the
9586: number of captured strings in the same way as pcre_exec(), so that the user
9587: gets the same result with and without JIT. */
9588:
1.1.1.4 misho 9589: if (offset_count != 2)
9590: offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
9591: max_offset_count = functions->top_bracket;
9592: if (offset_count > max_offset_count)
9593: offset_count = max_offset_count;
9594: arguments.offset_count = offset_count;
1.1 misho 9595:
1.1.1.3 misho 9596: if (functions->callback)
9597: arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
1.1 misho 9598: else
1.1.1.3 misho 9599: arguments.stack = (struct sljit_stack *)functions->userdata;
1.1 misho 9600:
9601: if (arguments.stack == NULL)
1.1.1.3 misho 9602: retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
1.1 misho 9603: else
9604: {
1.1.1.3 misho 9605: convert_executable_func.executable_func = functions->executable_funcs[mode];
1.1 misho 9606: retval = convert_executable_func.call_executable_func(&arguments);
9607: }
9608:
1.1.1.4 misho 9609: if (retval * 2 > offset_count)
9610: retval = 0;
9611: if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
9612: *(extra_data->mark) = arguments.mark_ptr;
9613:
9614: return retval;
9615: }
9616:
9617: #if defined COMPILE_PCRE8
9618: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
9619: pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
9620: PCRE_SPTR subject, int length, int start_offset, int options,
9621: int *offsets, int offset_count, pcre_jit_stack *stack)
9622: #elif defined COMPILE_PCRE16
9623: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
9624: pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
9625: PCRE_SPTR16 subject, int length, int start_offset, int options,
9626: int *offsets, int offset_count, pcre16_jit_stack *stack)
9627: #elif defined COMPILE_PCRE32
9628: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
9629: pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
9630: PCRE_SPTR32 subject, int length, int start_offset, int options,
9631: int *offsets, int offset_count, pcre32_jit_stack *stack)
9632: #endif
9633: {
9634: pcre_uchar *subject_ptr = (pcre_uchar *)subject;
9635: executable_functions *functions = (executable_functions *)extra_data->executable_jit;
9636: union {
9637: void* executable_func;
9638: jit_function call_executable_func;
9639: } convert_executable_func;
9640: jit_arguments arguments;
9641: int max_offset_count;
9642: int retval;
9643: int mode = JIT_COMPILE;
9644:
9645: SLJIT_UNUSED_ARG(argument_re);
9646:
9647: /* Plausibility checks */
9648: if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
9649:
9650: if ((options & PCRE_PARTIAL_HARD) != 0)
9651: mode = JIT_PARTIAL_HARD_COMPILE;
9652: else if ((options & PCRE_PARTIAL_SOFT) != 0)
9653: mode = JIT_PARTIAL_SOFT_COMPILE;
9654:
9655: if (functions->executable_funcs[mode] == NULL)
9656: return PCRE_ERROR_JIT_BADOPTION;
9657:
9658: /* Sanity checks should be handled by pcre_exec. */
9659: arguments.stack = (struct sljit_stack *)stack;
9660: arguments.str = subject_ptr + start_offset;
9661: arguments.begin = subject_ptr;
9662: arguments.end = subject_ptr + length;
9663: arguments.mark_ptr = NULL;
9664: /* JIT decreases this value less frequently than the interpreter. */
9665: arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
9666: if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
9667: arguments.limit_match = functions->limit_match;
9668: arguments.notbol = (options & PCRE_NOTBOL) != 0;
9669: arguments.noteol = (options & PCRE_NOTEOL) != 0;
9670: arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
9671: arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
9672: arguments.offsets = offsets;
9673: arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
9674: arguments.real_offset_count = offset_count;
9675:
9676: /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
9677: the output vector for storing captured strings, with the remainder used as
9678: workspace. We don't need the workspace here. For compatibility, we limit the
9679: number of captured strings in the same way as pcre_exec(), so that the user
9680: gets the same result with and without JIT. */
9681:
9682: if (offset_count != 2)
9683: offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
9684: max_offset_count = functions->top_bracket;
9685: if (offset_count > max_offset_count)
9686: offset_count = max_offset_count;
9687: arguments.offset_count = offset_count;
9688:
9689: convert_executable_func.executable_func = functions->executable_funcs[mode];
9690: retval = convert_executable_func.call_executable_func(&arguments);
9691:
9692: if (retval * 2 > offset_count)
1.1 misho 9693: retval = 0;
1.1.1.3 misho 9694: if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
9695: *(extra_data->mark) = arguments.mark_ptr;
9696:
1.1 misho 9697: return retval;
9698: }
9699:
9700: void
1.1.1.3 misho 9701: PRIV(jit_free)(void *executable_funcs)
1.1 misho 9702: {
1.1.1.3 misho 9703: int i;
9704: executable_functions *functions = (executable_functions *)executable_funcs;
9705: for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
9706: {
9707: if (functions->executable_funcs[i] != NULL)
9708: sljit_free_code(functions->executable_funcs[i]);
9709: }
9710: SLJIT_FREE(functions);
1.1 misho 9711: }
9712:
9713: int
1.1.1.3 misho 9714: PRIV(jit_get_size)(void *executable_funcs)
1.1 misho 9715: {
1.1.1.3 misho 9716: int i;
9717: sljit_uw size = 0;
9718: sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
9719: for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
9720: size += executable_sizes[i];
9721: return (int)size;
1.1 misho 9722: }
9723:
1.1.1.2 misho 9724: const char*
9725: PRIV(jit_get_target)(void)
9726: {
9727: return sljit_get_platform_name();
9728: }
9729:
1.1.1.4 misho 9730: #if defined COMPILE_PCRE8
1.1 misho 9731: PCRE_EXP_DECL pcre_jit_stack *
9732: pcre_jit_stack_alloc(int startsize, int maxsize)
1.1.1.4 misho 9733: #elif defined COMPILE_PCRE16
1.1.1.2 misho 9734: PCRE_EXP_DECL pcre16_jit_stack *
9735: pcre16_jit_stack_alloc(int startsize, int maxsize)
1.1.1.4 misho 9736: #elif defined COMPILE_PCRE32
9737: PCRE_EXP_DECL pcre32_jit_stack *
9738: pcre32_jit_stack_alloc(int startsize, int maxsize)
1.1.1.2 misho 9739: #endif
1.1 misho 9740: {
9741: if (startsize < 1 || maxsize < 1)
9742: return NULL;
9743: if (startsize > maxsize)
9744: startsize = maxsize;
9745: startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
9746: maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
1.1.1.2 misho 9747: return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
1.1 misho 9748: }
9749:
1.1.1.4 misho 9750: #if defined COMPILE_PCRE8
1.1 misho 9751: PCRE_EXP_DECL void
9752: pcre_jit_stack_free(pcre_jit_stack *stack)
1.1.1.4 misho 9753: #elif defined COMPILE_PCRE16
1.1.1.2 misho 9754: PCRE_EXP_DECL void
9755: pcre16_jit_stack_free(pcre16_jit_stack *stack)
1.1.1.4 misho 9756: #elif defined COMPILE_PCRE32
9757: PCRE_EXP_DECL void
9758: pcre32_jit_stack_free(pcre32_jit_stack *stack)
1.1.1.2 misho 9759: #endif
1.1 misho 9760: {
1.1.1.3 misho 9761: sljit_free_stack((struct sljit_stack *)stack);
1.1 misho 9762: }
9763:
1.1.1.4 misho 9764: #if defined COMPILE_PCRE8
1.1 misho 9765: PCRE_EXP_DECL void
9766: pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
1.1.1.4 misho 9767: #elif defined COMPILE_PCRE16
1.1.1.2 misho 9768: PCRE_EXP_DECL void
9769: pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
1.1.1.4 misho 9770: #elif defined COMPILE_PCRE32
9771: PCRE_EXP_DECL void
9772: pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
1.1.1.2 misho 9773: #endif
1.1 misho 9774: {
1.1.1.3 misho 9775: executable_functions *functions;
1.1 misho 9776: if (extra != NULL &&
9777: (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
9778: extra->executable_jit != NULL)
9779: {
1.1.1.3 misho 9780: functions = (executable_functions *)extra->executable_jit;
9781: functions->callback = callback;
9782: functions->userdata = userdata;
1.1 misho 9783: }
9784: }
9785:
1.1.1.5 ! misho 9786: #if defined COMPILE_PCRE8
! 9787: PCRE_EXP_DECL void
! 9788: pcre_jit_free_unused_memory(void)
! 9789: #elif defined COMPILE_PCRE16
! 9790: PCRE_EXP_DECL void
! 9791: pcre16_jit_free_unused_memory(void)
! 9792: #elif defined COMPILE_PCRE32
! 9793: PCRE_EXP_DECL void
! 9794: pcre32_jit_free_unused_memory(void)
! 9795: #endif
! 9796: {
! 9797: sljit_free_unused_memory_exec();
! 9798: }
! 9799:
1.1 misho 9800: #else /* SUPPORT_JIT */
9801:
9802: /* These are dummy functions to avoid linking errors when JIT support is not
9803: being compiled. */
9804:
1.1.1.4 misho 9805: #if defined COMPILE_PCRE8
1.1 misho 9806: PCRE_EXP_DECL pcre_jit_stack *
9807: pcre_jit_stack_alloc(int startsize, int maxsize)
1.1.1.4 misho 9808: #elif defined COMPILE_PCRE16
1.1.1.2 misho 9809: PCRE_EXP_DECL pcre16_jit_stack *
9810: pcre16_jit_stack_alloc(int startsize, int maxsize)
1.1.1.4 misho 9811: #elif defined COMPILE_PCRE32
9812: PCRE_EXP_DECL pcre32_jit_stack *
9813: pcre32_jit_stack_alloc(int startsize, int maxsize)
1.1.1.2 misho 9814: #endif
1.1 misho 9815: {
9816: (void)startsize;
9817: (void)maxsize;
9818: return NULL;
9819: }
9820:
1.1.1.4 misho 9821: #if defined COMPILE_PCRE8
1.1 misho 9822: PCRE_EXP_DECL void
9823: pcre_jit_stack_free(pcre_jit_stack *stack)
1.1.1.4 misho 9824: #elif defined COMPILE_PCRE16
1.1.1.2 misho 9825: PCRE_EXP_DECL void
9826: pcre16_jit_stack_free(pcre16_jit_stack *stack)
1.1.1.4 misho 9827: #elif defined COMPILE_PCRE32
9828: PCRE_EXP_DECL void
9829: pcre32_jit_stack_free(pcre32_jit_stack *stack)
1.1.1.2 misho 9830: #endif
1.1 misho 9831: {
9832: (void)stack;
9833: }
9834:
1.1.1.4 misho 9835: #if defined COMPILE_PCRE8
1.1 misho 9836: PCRE_EXP_DECL void
9837: pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
1.1.1.4 misho 9838: #elif defined COMPILE_PCRE16
1.1.1.2 misho 9839: PCRE_EXP_DECL void
9840: pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
1.1.1.4 misho 9841: #elif defined COMPILE_PCRE32
9842: PCRE_EXP_DECL void
9843: pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
1.1.1.2 misho 9844: #endif
1.1 misho 9845: {
9846: (void)extra;
9847: (void)callback;
9848: (void)userdata;
9849: }
9850:
1.1.1.5 ! misho 9851: #if defined COMPILE_PCRE8
! 9852: PCRE_EXP_DECL void
! 9853: pcre_jit_free_unused_memory(void)
! 9854: #elif defined COMPILE_PCRE16
! 9855: PCRE_EXP_DECL void
! 9856: pcre16_jit_free_unused_memory(void)
! 9857: #elif defined COMPILE_PCRE32
! 9858: PCRE_EXP_DECL void
! 9859: pcre32_jit_free_unused_memory(void)
! 9860: #endif
! 9861: {
! 9862: }
! 9863:
1.1 misho 9864: #endif
9865:
9866: /* End of pcre_jit_compile.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>