--- embedaddon/pcre/pcre_jit_compile.c 2012/02/21 23:50:25 1.1.1.2 +++ embedaddon/pcre/pcre_jit_compile.c 2014/06/15 19:46:03 1.1.1.5 @@ -6,10 +6,10 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge The machine code generator part (this module) was written by Zoltan Herczeg - Copyright (c) 2010-2012 + Copyright (c) 2010-2013 ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "pcre_internal.h" -#ifdef SUPPORT_JIT +#if defined SUPPORT_JIT /* All-in-one: Since we use the JIT compiler only from here, we just include it. This way we don't need to touch the build @@ -65,9 +65,21 @@ system files. */ #error Unsupported architecture #endif -/* Allocate memory on the stack. Fast, but limited size. */ -#define LOCAL_SPACE_SIZE 32768 +/* Defines for debugging purposes. */ +/* 1 - Use unoptimized capturing brackets. + 2 - Enable capture_last_ptr (includes option 1). */ +/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ + +/* 1 - Always have a control head. */ +/* #define DEBUG_FORCE_CONTROL_HEAD 1 */ + +/* Allocate memory for the regex stack on the real machine stack. +Fast, but limited size. */ +#define MACHINE_STACK_SIZE 32768 + +/* Growth rate for stack allocated by the OS. Should be the multiply +of page size. */ #define STACK_GROWTH_RATE 8192 /* Enable to check that the allocation could destroy temporaries. */ @@ -82,23 +94,23 @@ The code generator follows the recursive nature of the expressions. The basic blocks of regular expressions are condition checkers whose execute different commands depending on the result of the condition check. The relationship between the operators can be horizontal (concatenation) and -vertical (sub-expression) (See struct fallback_common for more details). +vertical (sub-expression) (See struct backtrack_common for more details). 'ab' - 'a' and 'b' regexps are concatenated 'a+' - 'a' is the sub-expression of the '+' operator The condition checkers are boolean (true/false) checkers. Machine code is generated for the checker itself and for the actions depending on the result of the checker. -The 'true' case is called as the hot path (expected path), and the other is called as -the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken -branches on the hot path. +The 'true' case is called as the matching path (expected path), and the other is called as +the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken +branches on the matching path. Greedy star operator (*) : - Hot path: match happens. - Fallback path: match failed. + Matching path: match happens. + Backtrack path: match failed. Non-greedy star operator (*?) : - Hot path: no need to perform a match. - Fallback path: match is required. + Matching path: no need to perform a match. + Backtrack path: match is required. The following example shows how the code generated for a capturing bracket with two alternatives. Let A, B, C, D are arbirary regular expressions, and @@ -108,41 +120,41 @@ we have the following regular expression: The generated code will be the following: - A hot path - '(' hot path (pushing arguments to the stack) - B hot path - ')' hot path (pushing arguments to the stack) - D hot path + A matching path + '(' matching path (pushing arguments to the stack) + B matching path + ')' matching path (pushing arguments to the stack) + D matching path return with successful match - D fallback path - ')' fallback path (If we arrived from "C" jump to the fallback of "C") - B fallback path + D backtrack path + ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") + B backtrack path C expected path - jump to D hot path - C fallback path - A fallback path + jump to D matching path + C backtrack path + A backtrack path - Notice, that the order of fallback code paths are the opposite of the fast + Notice, that the order of backtrack code paths are the opposite of the fast code paths. In this way the topmost value on the stack is always belong - to the current fallback code path. The fallback code path must check + to the current backtrack code path. The backtrack path must check whether there is a next alternative. If so, it needs to jump back to - the hot path eventually. Otherwise it needs to clear out its own stack - frame and continue the execution on the fallback code paths. + the matching path eventually. Otherwise it needs to clear out its own stack + frame and continue the execution on the backtrack code paths. */ /* Saved stack frames: -Atomic blocks and asserts require reloading the values of local variables -when the fallback mechanism performed. Because of OP_RECURSE, the locals +Atomic blocks and asserts require reloading the values of private data +when the backtrack mechanism performed. Because of OP_RECURSE, the data are not necessarly known in compile time, thus we need a dynamic restore mechanism. The stack frames are stored in a chain list, and have the following format: ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] -Thus we can restore the locals to a particular point in the stack. +Thus we can restore the private data to a particular point in the stack. */ typedef struct jit_arguments { @@ -152,106 +164,117 @@ typedef struct jit_arguments { const pcre_uchar *begin; const pcre_uchar *end; int *offsets; - pcre_uchar *ptr; + pcre_uchar *uchar_ptr; + pcre_uchar *mark_ptr; + void *callout_data; /* Everything else after. */ - int offsetcount; - int calllimit; + pcre_uint32 limit_match; + int real_offset_count; + int offset_count; pcre_uint8 notbol; pcre_uint8 noteol; pcre_uint8 notempty; pcre_uint8 notempty_atstart; } jit_arguments; -typedef struct executable_function { - void *executable_func; +typedef struct executable_functions { + void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; PUBL(jit_callback) callback; void *userdata; - sljit_uw executable_size; -} executable_function; + pcre_uint32 top_bracket; + pcre_uint32 limit_match; + sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; +} executable_functions; typedef struct jump_list { struct sljit_jump *jump; struct jump_list *next; } jump_list; -enum stub_types { stack_alloc }; - typedef struct stub_list { - enum stub_types type; - int data; struct sljit_jump *start; - struct sljit_label *leave; + struct sljit_label *quit; struct stub_list *next; } stub_list; +enum frame_types { + no_frame = -1, + no_stack = -2 +}; + +enum control_types { + type_mark = 0, + type_then_trap = 1 +}; + typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); /* The following structure is the key data type for the recursive -code generator. It is allocated by compile_hotpath, and contains -the aguments for compile_fallbackpath. Must be the first member +code generator. It is allocated by compile_matchingpath, and contains +the arguments for compile_backtrackingpath. Must be the first member of its descendants. */ -typedef struct fallback_common { +typedef struct backtrack_common { /* Concatenation stack. */ - struct fallback_common *prev; - jump_list *nextfallbacks; + struct backtrack_common *prev; + jump_list *nextbacktracks; /* Internal stack (for component operators). */ - struct fallback_common *top; - jump_list *topfallbacks; + struct backtrack_common *top; + jump_list *topbacktracks; /* Opcode pointer. */ pcre_uchar *cc; -} fallback_common; +} backtrack_common; -typedef struct assert_fallback { - fallback_common common; +typedef struct assert_backtrack { + backtrack_common common; jump_list *condfailed; - /* Less than 0 (-1) if a frame is not needed. */ + /* Less than 0 if a frame is not needed. */ int framesize; /* Points to our private memory word on the stack. */ - int localptr; + int private_data_ptr; /* For iterators. */ - struct sljit_label *hotpath; -} assert_fallback; + struct sljit_label *matchingpath; +} assert_backtrack; -typedef struct bracket_fallback { - fallback_common common; +typedef struct bracket_backtrack { + backtrack_common common; /* Where to coninue if an alternative is successfully matched. */ - struct sljit_label *althotpath; + struct sljit_label *alternative_matchingpath; /* For rmin and rmax iterators. */ - struct sljit_label *recursivehotpath; + struct sljit_label *recursive_matchingpath; /* For greedy ? operator. */ - struct sljit_label *zerohotpath; + struct sljit_label *zero_matchingpath; /* Contains the branches of a failed condition. */ union { /* Both for OP_COND, OP_SCOND. */ jump_list *condfailed; - assert_fallback *assert; - /* For OP_ONCE. -1 if not needed. */ + assert_backtrack *assert; + /* For OP_ONCE. Less than 0 if not needed. */ int framesize; } u; /* Points to our private memory word on the stack. */ - int localptr; -} bracket_fallback; + int private_data_ptr; +} bracket_backtrack; -typedef struct bracketpos_fallback { - fallback_common common; +typedef struct bracketpos_backtrack { + backtrack_common common; /* Points to our private memory word on the stack. */ - int localptr; + int private_data_ptr; /* Reverting stack is needed. */ int framesize; /* Allocated stack size. */ int stacksize; -} bracketpos_fallback; +} bracketpos_backtrack; -typedef struct braminzero_fallback { - fallback_common common; - struct sljit_label *hotpath; -} braminzero_fallback; +typedef struct braminzero_backtrack { + backtrack_common common; + struct sljit_label *matchingpath; +} braminzero_backtrack; -typedef struct iterator_fallback { - fallback_common common; +typedef struct iterator_backtrack { + backtrack_common common; /* Next iteration. */ - struct sljit_label *hotpath; -} iterator_fallback; + struct sljit_label *matchingpath; +} iterator_backtrack; typedef struct recurse_entry { struct recurse_entry *next; @@ -260,33 +283,110 @@ typedef struct recurse_entry { /* Collects the calls until the function is not created. */ jump_list *calls; /* Points to the starting opcode. */ - int start; + sljit_sw start; } recurse_entry; -typedef struct recurse_fallback { - fallback_common common; -} recurse_fallback; +typedef struct recurse_backtrack { + backtrack_common common; + BOOL inlined_pattern; +} recurse_backtrack; +#define OP_THEN_TRAP OP_TABLE_LENGTH + +typedef struct then_trap_backtrack { + backtrack_common common; + /* If then_trap is not NULL, this structure contains the real + then_trap for the backtracking path. */ + struct then_trap_backtrack *then_trap; + /* Points to the starting opcode. */ + sljit_sw start; + /* Exit point for the then opcodes of this alternative. */ + jump_list *quit; + /* Frame size of the current alternative. */ + int framesize; +} then_trap_backtrack; + +#define MAX_RANGE_SIZE 6 + typedef struct compiler_common { + /* The sljit ceneric compiler. */ struct sljit_compiler *compiler; + /* First byte code. */ pcre_uchar *start; - int localsize; - int *localptrs; + /* Maps private data offset to each opcode. */ + sljit_si *private_data_ptrs; + /* Tells whether the capturing bracket is optimized. */ + pcre_uint8 *optimized_cbracket; + /* Tells whether the starting offset is a target of then. */ + pcre_uint8 *then_offsets; + /* Current position where a THEN must jump. */ + then_trap_backtrack *then_trap; + /* Starting offset of private data for capturing brackets. */ + int cbra_ptr; + /* Output vector starting point. Must be divisible by 2. */ + int ovector_start; + /* Last known position of the requested byte. */ + int req_char_ptr; + /* Head of the last recursion. */ + int recursive_head_ptr; + /* First inspected character for partial matching. */ + int start_used_ptr; + /* Starting pointer for partial soft matches. */ + int hit_start; + /* End pointer of the first line. */ + int first_line_end; + /* Points to the marked string. */ + int mark_ptr; + /* Recursive control verb management chain. */ + int control_head_ptr; + /* Points to the last matched capture block index. */ + int capture_last_ptr; + /* Points to the starting position of the current match. */ + int start_ptr; + + /* Flipped and lower case tables. */ const pcre_uint8 *fcc; - sljit_w lcc; - int cbraptr; + sljit_sw lcc; + /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ + int mode; + /* \K is found in the pattern. */ + BOOL has_set_som; + /* (*SKIP:arg) is found in the pattern. */ + BOOL has_skip_arg; + /* (*THEN) is found in the pattern. */ + BOOL has_then; + /* Needs to know the start position anytime. */ + BOOL needs_start_ptr; + /* Currently in recurse or negative assert. */ + BOOL local_exit; + /* Currently in a positive assert. */ + BOOL positive_assert; + /* Newline control. */ int nltype; int newline; int bsr_nltype; + /* Dollar endonly. */ int endonly; - sljit_w ctypes; - sljit_uw name_table; - sljit_w name_count; - sljit_w name_entry_size; - struct sljit_label *acceptlabel; + /* Tables. */ + sljit_sw ctypes; + int digits[2 + MAX_RANGE_SIZE]; + /* Named capturing brackets. */ + pcre_uchar *name_table; + sljit_sw name_count; + sljit_sw name_entry_size; + + /* Labels and jump lists. */ + struct sljit_label *partialmatchlabel; + struct sljit_label *quit_label; + struct sljit_label *forced_quit_label; + struct sljit_label *accept_label; stub_list *stubs; recurse_entry *entries; recurse_entry *currententry; + jump_list *partialmatch; + jump_list *quit; + jump_list *positive_assert_quit; + jump_list *forced_quit; jump_list *accept; jump_list *calllimit; jump_list *stackalloc; @@ -297,13 +397,16 @@ typedef struct compiler_common { jump_list *vspace; jump_list *casefulcmp; jump_list *caselesscmp; + jump_list *reset_match; BOOL jscript_compat; #ifdef SUPPORT_UTF BOOL utf; #ifdef SUPPORT_UCP BOOL use_ucp; #endif +#ifndef COMPILE_PCRE32 jump_list *utfreadchar; +#endif #ifdef COMPILE_PCRE8 jump_list *utfreadtype8; #endif @@ -321,89 +424,79 @@ typedef struct compare_context { #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED int ucharptr; union { - sljit_i asint; + sljit_si asint; sljit_uh asushort; -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 sljit_ub asbyte; sljit_ub asuchars[4]; -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 sljit_uh asuchars[2]; +#elif defined COMPILE_PCRE32 + sljit_ui asuchars[1]; #endif -#endif } c; union { - sljit_i asint; + sljit_si asint; sljit_uh asushort; -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 sljit_ub asbyte; sljit_ub asuchars[4]; -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 sljit_uh asuchars[2]; +#elif defined COMPILE_PCRE32 + sljit_ui asuchars[1]; #endif -#endif } oc; #endif } compare_context; -enum { - frame_end = 0, - frame_setstrbegin = -1 -}; - /* Undefine sljit macros. */ #undef CMP /* Used for accessing the elements of the stack. */ -#define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w)) +#define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw)) -#define TMP1 SLJIT_TEMPORARY_REG1 -#define TMP2 SLJIT_TEMPORARY_REG3 +#define TMP1 SLJIT_SCRATCH_REG1 +#define TMP2 SLJIT_SCRATCH_REG3 #define TMP3 SLJIT_TEMPORARY_EREG2 #define STR_PTR SLJIT_SAVED_REG1 #define STR_END SLJIT_SAVED_REG2 -#define STACK_TOP SLJIT_TEMPORARY_REG2 +#define STACK_TOP SLJIT_SCRATCH_REG2 #define STACK_LIMIT SLJIT_SAVED_REG3 #define ARGUMENTS SLJIT_SAVED_EREG1 -#define CALL_COUNT SLJIT_SAVED_EREG2 +#define COUNT_MATCH SLJIT_SAVED_EREG2 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1 -/* Locals layout. */ +/* Local space layout. */ /* These two locals can be used by the current opcode. */ -#define LOCALS0 (0 * sizeof(sljit_w)) -#define LOCALS1 (1 * sizeof(sljit_w)) +#define LOCALS0 (0 * sizeof(sljit_sw)) +#define LOCALS1 (1 * sizeof(sljit_sw)) /* Two local variables for possessive quantifiers (char1 cannot use them). */ -#define POSSESSIVE0 (2 * sizeof(sljit_w)) -#define POSSESSIVE1 (3 * sizeof(sljit_w)) -/* Head of the last recursion. */ -#define RECURSIVE_HEAD (4 * sizeof(sljit_w)) +#define POSSESSIVE0 (2 * sizeof(sljit_sw)) +#define POSSESSIVE1 (3 * sizeof(sljit_sw)) /* Max limit of recursions. */ -#define CALL_LIMIT (5 * sizeof(sljit_w)) -/* Last known position of the requested byte. */ -#define REQ_CHAR_PTR (6 * sizeof(sljit_w)) -/* End pointer of the first line. */ -#define FIRSTLINE_END (7 * sizeof(sljit_w)) +#define LIMIT_MATCH (4 * sizeof(sljit_sw)) /* The output vector is stored on the stack, and contains pointers to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is the start pointers when the end of the capturing group has not yet reached. */ -#define OVECTOR_START (8 * sizeof(sljit_w)) -#define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w)) -#define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w)) -#define PRIV_DATA(cc) (common->localptrs[(cc) - common->start]) +#define OVECTOR_START (common->ovector_start) +#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) +#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 #define MOV_UCHAR SLJIT_MOV_UB #define MOVU_UCHAR SLJIT_MOVU_UB -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 #define MOV_UCHAR SLJIT_MOV_UH #define MOVU_UCHAR SLJIT_MOVU_UH +#elif defined COMPILE_PCRE32 +#define MOV_UCHAR SLJIT_MOV_UI +#define MOVU_UCHAR SLJIT_MOVU_UI #else #error Unsupported compiling mode #endif -#endif /* Shortcuts. */ #define DEFINE_COMPILER \ @@ -420,12 +513,16 @@ the start pointers when the end of the capturing group sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) #define JUMPHERE(jump) \ sljit_set_label((jump), sljit_emit_label(compiler)) +#define SET_LABEL(jump, label) \ + sljit_set_label((jump), (label)) #define CMP(type, src1, src1w, src2, src2w) \ sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) #define CMPTO(type, src1, src1w, src2, src2w, label) \ sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) -#define COND_VALUE(op, dst, dstw, type) \ - sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type)) +#define OP_FLAGS(op, dst, dstw, src, srcw, type) \ + sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type)) +#define GET_LOCAL_BASE(dst, dstw, offset) \ + sljit_get_local_base(compiler, (dst), (dstw), (offset)) static pcre_uchar* bracketend(pcre_uchar* cc) { @@ -438,14 +535,14 @@ return cc; /* Functions whose might need modification for all new supported opcodes: next_opcode - get_localspace - set_localptrs + check_opcode_types + set_private_data_ptrs get_framesize init_frame - get_localsize - copy_locals - compile_hotpath - compile_fallbackpath + get_private_data_copy_length + copy_private_data + compile_matchingpath + compile_backtrackingpath */ static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) @@ -466,6 +563,8 @@ switch(*cc) case OP_WORDCHAR: case OP_ANY: case OP_ALLANY: + case OP_NOTPROP: + case OP_PROP: case OP_ANYNL: case OP_NOT_HSPACE: case OP_HSPACE: @@ -478,37 +577,67 @@ switch(*cc) case OP_CIRCM: case OP_DOLL: case OP_DOLLM: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - case OP_TYPEPOSQUERY: case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRPLUS: case OP_CRMINPLUS: case OP_CRQUERY: case OP_CRMINQUERY: + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + case OP_CRPOSRANGE: + case OP_CLASS: + case OP_NCLASS: + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + case OP_RECURSE: + case OP_CALLOUT: + case OP_ALT: + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_REVERSE: + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ONCE: + case OP_ONCE_NC: + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_CBRAPOS: + case OP_COND: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + case OP_SCOND: + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: case OP_DEF: case OP_BRAZERO: case OP_BRAMINZERO: case OP_BRAPOSZERO: + case OP_PRUNE: + case OP_SKIP: + case OP_THEN: + case OP_COMMIT: case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: + case OP_CLOSE: case OP_SKIPZERO: - return cc + 1; + return cc + PRIV(OP_lengths)[*cc]; - case OP_ANYBYTE: -#ifdef SUPPORT_UTF - if (common->utf) return NULL; -#endif - return cc + 1; - case OP_CHAR: case OP_CHARI: case OP_NOT: @@ -519,182 +648,415 @@ switch(*cc) case OP_MINPLUS: case OP_QUERY: case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: case OP_POSSTAR: case OP_POSPLUS: case OP_POSQUERY: + case OP_POSUPTO: case OP_STARI: case OP_MINSTARI: case OP_PLUSI: case OP_MINPLUSI: case OP_QUERYI: case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: case OP_POSSTARI: case OP_POSPLUSI: case OP_POSQUERYI: + case OP_POSUPTOI: case OP_NOTSTAR: case OP_NOTMINSTAR: case OP_NOTPLUS: case OP_NOTMINPLUS: case OP_NOTQUERY: case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: case OP_NOTPOSSTAR: case OP_NOTPOSPLUS: case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: case OP_NOTSTARI: case OP_NOTMINSTARI: case OP_NOTPLUSI: case OP_NOTMINPLUSI: case OP_NOTQUERYI: case OP_NOTMINQUERYI: - case OP_NOTPOSSTARI: - case OP_NOTPOSPLUSI: - case OP_NOTPOSQUERYI: - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - return cc; - - case OP_UPTO: - case OP_MINUPTO: - case OP_EXACT: - case OP_POSUPTO: - case OP_UPTOI: - case OP_MINUPTOI: - case OP_EXACTI: - case OP_POSUPTOI: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - case OP_NOTEXACT: - case OP_NOTPOSUPTO: case OP_NOTUPTOI: case OP_NOTMINUPTOI: case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: case OP_NOTPOSUPTOI: - cc += 2 + IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; #ifdef SUPPORT_UTF if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif return cc; - case OP_NOTPROP: - case OP_PROP: - return cc + 1 + 2; - + /* Special cases. */ + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: - case OP_REF: - case OP_REFI: - case OP_CREF: - case OP_NCREF: - case OP_RREF: - case OP_NRREF: - case OP_CLOSE: - cc += 1 + IMM2_SIZE; - return cc; + return cc + PRIV(OP_lengths)[*cc] - 1; - case OP_CRRANGE: - case OP_CRMINRANGE: - return cc + 1 + 2 * IMM2_SIZE; + case OP_ANYBYTE: +#ifdef SUPPORT_UTF + if (common->utf) return NULL; +#endif + return cc + 1; - case OP_CLASS: - case OP_NCLASS: - return cc + 1 + 32 / sizeof(pcre_uchar); - #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: return cc + GET(cc, 1); #endif - case OP_RECURSE: - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_REVERSE: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRA: - case OP_BRAPOS: - case OP_COND: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: - case OP_ALT: - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_KETRPOS: - return cc + 1 + LINK_SIZE; + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + return cc + 1 + 2 + cc[1]; - case OP_CBRA: - case OP_CBRAPOS: - case OP_SCBRA: - case OP_SCBRAPOS: - return cc + 1 + LINK_SIZE + IMM2_SIZE; - default: + /* All opcodes are supported now! */ + SLJIT_ASSERT_STOP(); return NULL; } } -static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) +static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) { -int localspace = 0; -pcre_uchar *alternative; +int count; +pcre_uchar *slot; + /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) { switch(*cc) { - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - case OP_ONCE_NC: - case OP_BRAPOS: - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: - localspace += sizeof(sljit_w); - cc += 1 + LINK_SIZE; + case OP_SET_SOM: + common->has_set_som = TRUE; + cc += 1; break; + case OP_REF: + case OP_REFI: + common->optimized_cbracket[GET2(cc, 1)] = 0; + cc += 1 + IMM2_SIZE; + break; + case OP_CBRAPOS: case OP_SCBRAPOS: - localspace += sizeof(sljit_w); + common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: - /* Might be a hidden SCOND. */ - alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) - localspace += sizeof(sljit_w); + case OP_SCOND: + /* Only AUTO_CALLOUT can insert this opcode. We do + not intend to support this case. */ + if (cc[1 + LINK_SIZE] == OP_CALLOUT) + return FALSE; cc += 1 + LINK_SIZE; break; + case OP_CREF: + common->optimized_cbracket[GET2(cc, 1)] = 0; + cc += 1 + IMM2_SIZE; + break; + + case OP_DNREF: + case OP_DNREFI: + case OP_DNCREF: + count = GET2(cc, 1 + IMM2_SIZE); + slot = common->name_table + GET2(cc, 1) * common->name_entry_size; + while (count-- > 0) + { + common->optimized_cbracket[GET2(slot, 0)] = 0; + slot += common->name_entry_size; + } + cc += 1 + 2 * IMM2_SIZE; + break; + + case OP_RECURSE: + /* Set its value only once. */ + if (common->recursive_head_ptr == 0) + { + common->recursive_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 1 + LINK_SIZE; + break; + + case OP_CALLOUT: + if (common->capture_last_ptr == 0) + { + common->capture_last_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 2 + 2 * LINK_SIZE; + break; + + case OP_THEN_ARG: + common->has_then = TRUE; + common->control_head_ptr = 1; + /* Fall through. */ + + case OP_PRUNE_ARG: + common->needs_start_ptr = TRUE; + /* Fall through. */ + + case OP_MARK: + if (common->mark_ptr == 0) + { + common->mark_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + common->has_then = TRUE; + common->control_head_ptr = 1; + /* Fall through. */ + + case OP_PRUNE: + case OP_SKIP: + common->needs_start_ptr = TRUE; + cc += 1; + break; + + case OP_SKIP_ARG: + common->control_head_ptr = 1; + common->has_skip_arg = TRUE; + cc += 1 + 2 + cc[1]; + break; + default: cc = next_opcode(common, cc); if (cc == NULL) - return -1; + return FALSE; break; } } -return localspace; +return TRUE; } -static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend) +static int get_class_iterator_size(pcre_uchar *cc) { +switch(*cc) + { + case OP_CRSTAR: + case OP_CRPLUS: + return 2; + + case OP_CRMINSTAR: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + return 1; + + case OP_CRRANGE: + case OP_CRMINRANGE: + if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) + return 0; + return 2; + + default: + return 0; + } +} + +static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin) +{ +pcre_uchar *end = bracketend(begin); +pcre_uchar *next; +pcre_uchar *next_end; +pcre_uchar *max_end; +pcre_uchar type; +sljit_sw length = end - begin; +int min, max, i; + +/* Detect fixed iterations first. */ +if (end[-(1 + LINK_SIZE)] != OP_KET) + return FALSE; + +/* Already detected repeat. */ +if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) + return TRUE; + +next = end; +min = 1; +while (1) + { + if (*next != *begin) + break; + next_end = bracketend(next); + if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) + break; + next = next_end; + min++; + } + +if (min == 2) + return FALSE; + +max = 0; +max_end = next; +if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) + { + type = *next; + while (1) + { + if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) + break; + next_end = bracketend(next + 2 + LINK_SIZE); + if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) + break; + next = next_end; + max++; + } + + if (next[0] == type && next[1] == *begin && max >= 1) + { + next_end = bracketend(next + 1); + if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) + { + for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) + if (*next_end != OP_KET) + break; + + if (i == max) + { + common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; + common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; + /* +2 the original and the last. */ + common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; + if (min == 1) + return TRUE; + min--; + max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); + } + } + } + } + +if (min >= 3) + { + common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; + common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; + common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; + return TRUE; + } + +return FALSE; +} + +#define CASE_ITERATOR_PRIVATE_DATA_1 \ + case OP_MINSTAR: \ + case OP_MINPLUS: \ + case OP_QUERY: \ + case OP_MINQUERY: \ + case OP_MINSTARI: \ + case OP_MINPLUSI: \ + case OP_QUERYI: \ + case OP_MINQUERYI: \ + case OP_NOTMINSTAR: \ + case OP_NOTMINPLUS: \ + case OP_NOTQUERY: \ + case OP_NOTMINQUERY: \ + case OP_NOTMINSTARI: \ + case OP_NOTMINPLUSI: \ + case OP_NOTQUERYI: \ + case OP_NOTMINQUERYI: + +#define CASE_ITERATOR_PRIVATE_DATA_2A \ + case OP_STAR: \ + case OP_PLUS: \ + case OP_STARI: \ + case OP_PLUSI: \ + case OP_NOTSTAR: \ + case OP_NOTPLUS: \ + case OP_NOTSTARI: \ + case OP_NOTPLUSI: + +#define CASE_ITERATOR_PRIVATE_DATA_2B \ + case OP_UPTO: \ + case OP_MINUPTO: \ + case OP_UPTOI: \ + case OP_MINUPTOI: \ + case OP_NOTUPTO: \ + case OP_NOTMINUPTO: \ + case OP_NOTUPTOI: \ + case OP_NOTMINUPTOI: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ + case OP_TYPEMINSTAR: \ + case OP_TYPEMINPLUS: \ + case OP_TYPEQUERY: \ + case OP_TYPEMINQUERY: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ + case OP_TYPESTAR: \ + case OP_TYPEPLUS: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ + case OP_TYPEUPTO: \ + case OP_TYPEMINUPTO: + +static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend) +{ pcre_uchar *cc = common->start; pcre_uchar *alternative; +pcre_uchar *end = NULL; +int private_data_ptr = *private_data_start; +int space, size, bracketlen; + while (cc < ccend) { + space = 0; + size = 0; + bracketlen = 0; + if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) + return; + + if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND) + if (detect_repeat(common, cc)) + { + /* These brackets are converted to repeats, so no global + based single character repeat is allowed. */ + if (cc >= end) + end = bracketend(cc); + } + switch(*cc) { + case OP_KET: + if (common->private_data_ptrs[cc + 1 - common->start] != 0) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + cc += common->private_data_ptrs[cc + 1 - common->start]; + } + cc += 1 + LINK_SIZE; + break; + case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: @@ -705,16 +1067,16 @@ while (cc < ccend) case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - common->localptrs[cc - common->start] = localptr; - localptr += sizeof(sljit_w); - cc += 1 + LINK_SIZE; + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - common->localptrs[cc - common->start] = localptr; - localptr += sizeof(sljit_w); - cc += 1 + LINK_SIZE + IMM2_SIZE; + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -722,77 +1084,290 @@ while (cc < ccend) alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { - common->localptrs[cc - common->start] = localptr; - localptr += sizeof(sljit_w); + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); } - cc += 1 + LINK_SIZE; + bracketlen = 1 + LINK_SIZE; break; + case OP_BRA: + bracketlen = 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + bracketlen = 1 + LINK_SIZE + IMM2_SIZE; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + space = 1; + size = -2; + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + space = 2; + size = -2; + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + space = 2; + size = -(2 + IMM2_SIZE); + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + space = 1; + size = 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) + space = 2; + size = 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) + space = 2; + size = 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: + size += 1 + 32 / sizeof(pcre_uchar); + space = get_class_iterator_size(cc + size); + break; + +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + case OP_XCLASS: + size = GET(cc, 1); + space = get_class_iterator_size(cc + size); + break; +#endif + default: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); break; } + + /* Character iterators, which are not inside a repeated bracket, + gets a private slot instead of allocating it on the stack. */ + if (space > 0 && cc >= end) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw) * space; + } + + if (size != 0) + { + if (size < 0) + { + cc += -size; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + } + else + cc += size; + } + + if (bracketlen > 0) + { + if (cc >= end) + { + end = bracketend(cc); + if (end[-1 - LINK_SIZE] == OP_KET) + end = NULL; + } + cc += bracketlen; + } } +*private_data_start = private_data_ptr; } -/* Returns with -1 if no need for frame. */ -static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive) +/* Returns with a frame_types (always < 0) if no need for frame. */ +static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head) { -pcre_uchar *ccend = bracketend(cc); int length = 0; -BOOL possessive = FALSE; -BOOL setsom_found = FALSE; +int possessive = 0; +BOOL stack_restore = FALSE; +BOOL setsom_found = recursive; +BOOL setmark_found = recursive; +/* The last capture is a local variable even for recursions. */ +BOOL capture_last_found = FALSE; -if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +*needs_control_head = TRUE; +#else +*needs_control_head = FALSE; +#endif + +if (ccend == NULL) { - length = 3; - possessive = TRUE; + ccend = bracketend(cc) - (1 + LINK_SIZE); + if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) + { + possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; + /* This is correct regardless of common->capture_last_ptr. */ + capture_last_found = TRUE; + } + cc = next_opcode(common, cc); } -cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); while (cc < ccend) switch(*cc) { case OP_SET_SOM: - case OP_RECURSE: + SLJIT_ASSERT(common->has_set_som); + stack_restore = TRUE; if (!setsom_found) { length += 2; setsom_found = TRUE; } - cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE; + cc += 1; break; + case OP_MARK: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + stack_restore = TRUE; + if (!setmark_found) + { + length += 2; + setmark_found = TRUE; + } + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + stack_restore = TRUE; + if (common->has_set_som && !setsom_found) + { + length += 2; + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + length += 2; + setmark_found = TRUE; + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + length += 2; + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + case OP_CBRA: case OP_CBRAPOS: case OP_SCBRA: case OP_SCBRAPOS: + stack_restore = TRUE; + if (common->capture_last_ptr != 0 && !capture_last_found) + { + length += 2; + capture_last_found = TRUE; + } length += 3; cc += 1 + LINK_SIZE + IMM2_SIZE; break; default: + stack_restore = TRUE; + /* Fall through. */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); break; } /* Possessive quantifiers can use a special case. */ -if (SLJIT_UNLIKELY(possessive) && length == 3) - return -1; +if (SLJIT_UNLIKELY(possessive == length)) + return stack_restore ? no_frame : no_stack; if (length > 0) return length + 1; -return -1; +return stack_restore ? no_frame : no_stack; } -static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive) +static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive) { DEFINE_COMPILER; -pcre_uchar *ccend = bracketend(cc); -BOOL setsom_found = FALSE; +BOOL setsom_found = recursive; +BOOL setmark_found = recursive; +/* The last capture is a local variable even for recursions. */ +BOOL capture_last_found = FALSE; int offset; /* >= 1 + shortest item size (2) */ @@ -800,39 +1375,100 @@ SLJIT_UNUSED_ARG(stacktop); SLJIT_ASSERT(stackpos >= stacktop + 2); stackpos = STACK(stackpos); -if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) - cc = next_opcode(common, cc); +if (ccend == NULL) + { + ccend = bracketend(cc) - (1 + LINK_SIZE); + if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) + cc = next_opcode(common, cc); + } + SLJIT_ASSERT(cc != NULL); while (cc < ccend) switch(*cc) { case OP_SET_SOM: - case OP_RECURSE: + SLJIT_ASSERT(common->has_set_som); if (!setsom_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin); - stackpos += (int)sizeof(sljit_w); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); + stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_w); + stackpos += (int)sizeof(sljit_sw); setsom_found = TRUE; } - cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE; + cc += 1; break; + case OP_MARK: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + if (!setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); + stackpos += (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_sw); + setmark_found = TRUE; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + if (common->has_set_som && !setsom_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); + stackpos += (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_sw); + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); + stackpos += (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_sw); + setmark_found = TRUE; + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); + stackpos += (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_sw); + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + case OP_CBRA: case OP_CBRAPOS: case OP_SCBRA: case OP_SCBRAPOS: + if (common->capture_last_ptr != 0 && !capture_last_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); + stackpos += (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos += (int)sizeof(sljit_sw); + capture_last_found = TRUE; + } offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); - stackpos += (int)sizeof(sljit_w); + stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos += (int)sizeof(sljit_w); + stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); - stackpos += (int)sizeof(sljit_w); + stackpos += (int)sizeof(sljit_sw); cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -843,19 +1479,27 @@ while (cc < ccend) break; } -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); SLJIT_ASSERT(stackpos == STACK(stacktop)); } -static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) +static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head) { -int localsize = 2; +int private_data_length = needs_control_head ? 3 : 2; +int size; pcre_uchar *alternative; -/* Calculate the sum of the local variables. */ +/* Calculate the sum of the private machine words. */ while (cc < ccend) { + size = 0; switch(*cc) { + case OP_KET: + if (PRIVATE_DATA(cc) != 0) + private_data_length++; + cc += 1 + LINK_SIZE; + break; + case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: @@ -866,19 +1510,20 @@ while (cc < ccend) case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - localsize++; + private_data_length++; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - localsize++; + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + private_data_length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - localsize += 2; + private_data_length += 2; cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -886,10 +1531,68 @@ while (cc < ccend) /* Might be a hidden SCOND. */ alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) - localsize++; + private_data_length++; cc += 1 + LINK_SIZE; break; + CASE_ITERATOR_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc)) + private_data_length++; + cc += 2; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc)) + private_data_length += 2; + cc += 2; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc)) + private_data_length += 2; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc)) + private_data_length++; + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc)) + private_data_length += 2; + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc)) + private_data_length += 2; + cc += 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + case OP_XCLASS: + size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); +#else + size = 1 + 32 / (int)sizeof(pcre_uchar); +#endif + if (PRIVATE_DATA(cc)) + private_data_length += get_class_iterator_size(cc + size); + cc += size; + break; + default: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); @@ -897,15 +1600,15 @@ while (cc < ccend) } } SLJIT_ASSERT(cc == ccend); -return localsize; +return private_data_length; } -static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, - BOOL save, int stackptr, int stacktop) +static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, + BOOL save, int stackptr, int stacktop, BOOL needs_control_head) { DEFINE_COMPILER; int srcw[2]; -int count; +int count, size; BOOL tmp1next = TRUE; BOOL tmp1empty = TRUE; BOOL tmp2empty = TRUE; @@ -922,31 +1625,37 @@ stacktop = STACK(stacktop - 1); if (!save) { - stackptr += sizeof(sljit_w); + stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw); if (stackptr < stacktop) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); tmp1empty = FALSE; } if (stackptr < stacktop) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); tmp2empty = FALSE; } /* The tmp1next must be TRUE in either way. */ } -while (status != end) +do { count = 0; switch(status) { case start: - SLJIT_ASSERT(save); + SLJIT_ASSERT(save && common->recursive_head_ptr != 0); count = 1; - srcw[0] = RECURSIVE_HEAD; + srcw[0] = common->recursive_head_ptr; + if (needs_control_head) + { + SLJIT_ASSERT(common->control_head_ptr != 0); + count = 2; + srcw[1] = common->control_head_ptr; + } status = loop; break; @@ -959,6 +1668,15 @@ while (status != end) switch(*cc) { + case OP_KET: + if (PRIVATE_DATA(cc) != 0) + { + count = 1; + srcw[0] = PRIVATE_DATA(cc); + } + cc += 1 + LINK_SIZE; + break; + case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: @@ -970,24 +1688,27 @@ while (status != end) case OP_SBRAPOS: case OP_SCOND: count = 1; - srcw[0] = PRIV_DATA(cc); + srcw[0] = PRIVATE_DATA(cc); SLJIT_ASSERT(srcw[0] != 0); cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - count = 1; - srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + { + count = 1; + srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + } cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: count = 2; + srcw[0] = PRIVATE_DATA(cc); srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); - srcw[0] = PRIV_DATA(cc); - SLJIT_ASSERT(srcw[0] != 0); + SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0); cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -997,12 +1718,108 @@ while (status != end) if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { count = 1; - srcw[0] = PRIV_DATA(cc); + srcw[0] = PRIVATE_DATA(cc); SLJIT_ASSERT(srcw[0] != 0); } cc += 1 + LINK_SIZE; break; + CASE_ITERATOR_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc)) + { + count = 1; + srcw[0] = PRIVATE_DATA(cc); + } + cc += 2; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc)) + { + count = 2; + srcw[0] = PRIVATE_DATA(cc); + srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + } + cc += 2; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc)) + { + count = 2; + srcw[0] = PRIVATE_DATA(cc); + srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + } + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc)) + { + count = 1; + srcw[0] = PRIVATE_DATA(cc); + } + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc)) + { + count = 2; + srcw[0] = PRIVATE_DATA(cc); + srcw[1] = srcw[0] + sizeof(sljit_sw); + } + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc)) + { + count = 2; + srcw[0] = PRIVATE_DATA(cc); + srcw[1] = srcw[0] + sizeof(sljit_sw); + } + cc += 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + case OP_XCLASS: + size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); +#else + size = 1 + 32 / (int)sizeof(pcre_uchar); +#endif + if (PRIVATE_DATA(cc)) + switch(get_class_iterator_size(cc + size)) + { + case 1: + count = 1; + srcw[0] = PRIVATE_DATA(cc); + break; + + case 2: + count = 2; + srcw[0] = PRIVATE_DATA(cc); + srcw[1] = srcw[0] + sizeof(sljit_sw); + break; + + default: + SLJIT_ASSERT_STOP(); + break; + } + cc += size; + break; + default: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); @@ -1025,7 +1842,7 @@ while (status != end) if (!tmp1empty) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); tmp1empty = FALSE; @@ -1036,7 +1853,7 @@ while (status != end) if (!tmp2empty) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); tmp2empty = FALSE; @@ -1053,7 +1870,7 @@ while (status != end) if (!tmp1empty) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } tmp1next = FALSE; } @@ -1065,13 +1882,14 @@ while (status != end) if (!tmp2empty) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } tmp1next = TRUE; } } } } +while (status != end); if (save) { @@ -1080,12 +1898,12 @@ if (save) if (!tmp1empty) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } if (!tmp2empty) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } } else @@ -1093,20 +1911,60 @@ if (save) if (!tmp2empty) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } if (!tmp1empty) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); - stackptr += sizeof(sljit_w); + stackptr += sizeof(sljit_sw); } } } SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); } -static SLJIT_INLINE BOOL ispowerof2(unsigned int value) +static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset) { +pcre_uchar *end = bracketend(cc); +BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; + +/* Assert captures then. */ +if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) + current_offset = NULL; +/* Conditional block does not. */ +if (*cc == OP_COND || *cc == OP_SCOND) + has_alternatives = FALSE; + +cc = next_opcode(common, cc); +if (has_alternatives) + current_offset = common->then_offsets + (cc - common->start); + +while (cc < end) + { + if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) + cc = set_then_offsets(common, cc, current_offset); + else + { + if (*cc == OP_ALT && has_alternatives) + current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) + *current_offset = 1; + cc = next_opcode(common, cc); + } + } + +return end; +} + +#undef CASE_ITERATOR_PRIVATE_DATA_1 +#undef CASE_ITERATOR_PRIVATE_DATA_2A +#undef CASE_ITERATOR_PRIVATE_DATA_2B +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + +static SLJIT_INLINE BOOL is_powerof2(unsigned int value) +{ return (value & (value - 1)) == 0; } @@ -1115,8 +1973,8 @@ static SLJIT_INLINE void set_jumps(jump_list *list, st while (list) { /* sljit_set_label is clever enough to do nothing - if either the jump or the label is NULL */ - sljit_set_label(list->jump, label); + if either the jump or the label is NULL. */ + SET_LABEL(list->jump, label); list = list->next; } } @@ -1132,17 +1990,15 @@ if (list_item) } } -static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start) +static void add_stub(compiler_common *common, struct sljit_jump *start) { DEFINE_COMPILER; stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); if (list_item) { - list_item->type = type; - list_item->data = data; list_item->start = start; - list_item->leave = LABEL(); + list_item->quit = LABEL(); list_item->next = common->stubs; common->stubs = list_item; } @@ -1156,23 +2012,18 @@ stub_list* list_item = common->stubs; while (list_item) { JUMPHERE(list_item->start); - switch(list_item->type) - { - case stack_alloc: - add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); - break; - } - JUMPTO(SLJIT_JUMP, list_item->leave); + add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); + JUMPTO(SLJIT_JUMP, list_item->quit); list_item = list_item->next; } common->stubs = NULL; } -static SLJIT_INLINE void decrease_call_count(compiler_common *common) +static SLJIT_INLINE void count_match(compiler_common *common) { DEFINE_COMPILER; -OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1); +OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO)); } @@ -1181,7 +2032,7 @@ static SLJIT_INLINE void allocate_stack(compiler_commo /* May destroy all locals and registers except TMP2. */ DEFINE_COMPILER; -OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w)); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); #ifdef DESTROY_REGISTERS OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); @@ -1189,13 +2040,13 @@ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); #endif -add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); +add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); } static SLJIT_INLINE void free_stack(compiler_common *common, int size) { DEFINE_COMPILER; -OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w)); +OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); } static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) @@ -1203,71 +2054,201 @@ static SLJIT_INLINE void reset_ovector(compiler_common DEFINE_COMPILER; struct sljit_label *loop; int i; + /* At this point we can freely use all temporary registers. */ +SLJIT_ASSERT(length > 1); /* TMP1 returns with begin - 1. */ -OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); if (length < 8) { - for (i = 0; i < length; i++) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0); + for (i = 1; i < length; i++) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0); } else { - OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w)); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length); + GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START); + OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1); loop = LABEL(); - OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, loop); } } +static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +int i; + +SLJIT_ASSERT(length > 1); +/* OVECTOR(1) contains the "string begin - 1" constant. */ +if (length > 2) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); +if (length < 8) + { + for (i = 2; i < length; i++) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0); + } +else + { + GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); + loop = LABEL(); + OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); + OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, loop); + } + +OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0); +if (common->control_head_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base)); +} + +static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg) +{ +while (current != NULL) + { + switch (current[-2]) + { + case type_then_trap: + break; + + case type_mark: + if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0) + return current[-4]; + break; + + default: + SLJIT_ASSERT_STOP(); + break; + } + current = (sljit_sw*)current[-1]; + } +return -1; +} + static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) { DEFINE_COMPILER; struct sljit_label *loop; -struct sljit_jump *earlyexit; +struct sljit_jump *early_quit; /* At this point we can freely use all registers. */ OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0); -OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount)); -OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); -OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START); +OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); +OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count)); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0); +OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); +OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); +GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START); /* Unlikely, but possible */ -earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0); +early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0); loop = LABEL(); -OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0); -OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w)); +OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0); +OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)); /* Copy the integer value to the output buffer */ -#ifdef COMPILE_PCRE16 -OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT); #endif -OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0); +OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, loop); -JUMPHERE(earlyexit); +JUMPHERE(early_quit); /* Calculate the return value, which is the maximum ovector value. */ if (topbracket > 1) { - OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w)); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1); + GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1); /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */ loop = LABEL(); - OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w))); - OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); - CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop); - OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0); + OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw))); + OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0); } else OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); } +static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2); +SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 + && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); + +OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); +OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); +CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit); + +/* Store match begin and end. */ +OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets)); + +jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3); +OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0); +JUMPHERE(jump); + +OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); +OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0); + +OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0); + +JUMPTO(SLJIT_JUMP, quit); +} + +static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + { + /* The value of -1 must be kept for start_used_ptr! */ + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1); + /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting + is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ + jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (common->mode == JIT_PARTIAL_HARD_COMPILE) + { + jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +} + static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) { /* Detects if the character has an othercase. */ @@ -1352,10 +2333,10 @@ if (c <= 127 && bit == 0x20) return (0 << 8) | 0x20; /* Since c != oc, they must have at least 1 bit difference. */ -if (!ispowerof2(bit)) +if (!is_powerof2(bit)) return 0; -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 #ifdef SUPPORT_UTF if (common->utf && c > 127) @@ -1371,9 +2352,8 @@ if (common->utf && c > 127) #endif /* SUPPORT_UTF */ return (0 << 8) | bit; -#else /* COMPILE_PCRE8 */ +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -#ifdef COMPILE_PCRE16 #ifdef SUPPORT_UTF if (common->utf && c > 65535) { @@ -1384,41 +2364,121 @@ if (common->utf && c > 65535) } #endif /* SUPPORT_UTF */ return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); -#endif /* COMPILE_PCRE16 */ -#endif /* COMPILE_PCRE8 */ +#endif /* COMPILE_PCRE[8|16|32] */ } -static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks) +static void check_partial(compiler_common *common, BOOL force) { +/* Checks whether a partial matching is occurred. Does not modify registers. */ DEFINE_COMPILER; -add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); +struct sljit_jump *jump = NULL; + +SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); + +if (common->mode == JIT_COMPILE) + return; + +if (!force) + jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); +else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); + +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } + +if (jump != NULL) + JUMPHERE(jump); } +static void check_str_end(compiler_common *common, jump_list **end_reached) +{ +/* Does not affect registers. Usually used in a tight spot. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == JIT_COMPILE) + { + add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + { + add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); + } +else + { + add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +} + +static void detect_partial_match(compiler_common *common, jump_list **backtracks) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == JIT_COMPILE) + { + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +/* Partial matching mode. */ +jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); +add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); +if (common->mode == JIT_PARTIAL_SOFT_COMPILE) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + } +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +} + static void read_char(compiler_common *common) { /* Reads the character into TMP1, updates STR_PTR. Does not check STR_END. TMP2 Destroyed. */ DEFINE_COMPILER; -#ifdef SUPPORT_UTF +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 struct sljit_jump *jump; #endif OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#ifdef SUPPORT_UTF +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (common->utf) { -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); -#endif -#endif /* COMPILE_PCRE8 */ +#endif /* COMPILE_PCRE[8|16] */ add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); JUMPHERE(jump); } -#endif +#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } @@ -1427,33 +2487,31 @@ static void peek_char(compiler_common *common) /* Reads the character into TMP1, keeps STR_PTR. Does not check STR_END. TMP2 Destroyed. */ DEFINE_COMPILER; -#ifdef SUPPORT_UTF +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 struct sljit_jump *jump; #endif OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#ifdef SUPPORT_UTF +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 if (common->utf) { -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); -#endif -#endif /* COMPILE_PCRE8 */ +#endif /* COMPILE_PCRE[8|16] */ add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); JUMPHERE(jump); } -#endif +#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ } static void read_char8_type(compiler_common *common) { /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ DEFINE_COMPILER; -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 struct sljit_jump *jump; #endif @@ -1462,15 +2520,14 @@ if (common->utf) { OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 /* This can be an extra read in some situations, but hopefully it is needed in most cases. */ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); JUMPHERE(jump); -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); @@ -1478,23 +2535,27 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -#endif -#endif /* COMPILE_PCRE8 */ +#elif defined COMPILE_PCRE32 + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump); +#endif /* COMPILE_PCRE[8|16|32] */ return; } -#endif +#endif /* SUPPORT_UTF */ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#ifdef COMPILE_PCRE16 +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 /* The ctypes array contains only 256 values. */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); #endif OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); -#ifdef COMPILE_PCRE16 +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 JUMPHERE(jump); #endif } @@ -1503,7 +2564,8 @@ static void skip_char_back(compiler_common *common) { /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ DEFINE_COMPILER; -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +#if defined COMPILE_PCRE8 struct sljit_label *label; if (common->utf) @@ -1515,8 +2577,7 @@ if (common->utf) CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); return; } -#endif -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 if (common->utf) { OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); @@ -1524,16 +2585,17 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); return; } -#endif +#endif /* COMPILE_PCRE[8|16] */ +#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } -static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue) +static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue) { /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ DEFINE_COMPILER; @@ -1541,26 +2603,26 @@ DEFINE_COMPILER; if (nltype == NLTYPE_ANY) { add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); } else if (nltype == NLTYPE_ANYCRLF) { OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); } else { SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); - add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } } #ifdef SUPPORT_UTF -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 static void do_utfreadchar(compiler_common *common) { /* Fast decoding a UTF-8 character. TMP1 contains the first byte @@ -1568,7 +2630,7 @@ of the character (>= 0xc0). Return char value in TMP1, DEFINE_COMPILER; struct sljit_jump *jump; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); /* Searching for the first zero. */ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_C_NOT_ZERO); @@ -1627,7 +2689,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *compare; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_C_NOT_ZERO); @@ -1648,15 +2710,14 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(jump); /* We only have types for characters less than 256. */ -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0); +OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } -#else /* COMPILE_PCRE8 */ +#elif defined COMPILE_PCRE16 -#ifdef COMPILE_PCRE16 static void do_utfreadchar(compiler_common *common) { /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char @@ -1664,7 +2725,7 @@ of the character (>= 0xd800). Return char value in TMP DEFINE_COMPILER; struct sljit_jump *jump; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); /* Do nothing, only return. */ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -1681,9 +2742,8 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } -#endif /* COMPILE_PCRE16 */ -#endif /* COMPILE_PCRE8 */ +#endif /* COMPILE_PCRE[8|16] */ #endif /* SUPPORT_UTF */ @@ -1701,15 +2761,15 @@ DEFINE_COMPILER; SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1)); +OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -1723,7 +2783,7 @@ struct sljit_label *newlinelabel = NULL; struct sljit_jump *start; struct sljit_jump *end = NULL; struct sljit_jump *nl = NULL; -#ifdef SUPPORT_UTF +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 struct sljit_jump *singlechar; #endif jump_list *newline = NULL; @@ -1737,8 +2797,8 @@ if (!(hascrorlf || firstline) && (common->nltype == NL if (firstline) { /* Search for the end of the first line. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0); + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { @@ -1749,23 +2809,24 @@ if (firstline) OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(end); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else { end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); mainloop = LABEL(); /* Continual stores does not cause data dependency. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); read_char(common); check_newlinechar(common, common->nltype, &newline, TRUE); CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0); + JUMPHERE(end); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); set_jumps(newline, LABEL()); } - JUMPHERE(end); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); } start = JUMP(SLJIT_JUMP); @@ -1777,9 +2838,9 @@ if (newlinecheck) end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); -#ifdef COMPILE_PCRE16 - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); nl = JUMP(SLJIT_JUMP); @@ -1800,27 +2861,28 @@ if (newlinecheck) CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +#if defined COMPILE_PCRE8 if (common->utf) { singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(singlechar); } -#endif -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 if (common->utf) { singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(singlechar); } -#endif +#endif /* COMPILE_PCRE[8|16] */ +#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ JUMPHERE(start); if (newlinecheck) @@ -1832,22 +2894,192 @@ if (newlinecheck) return mainloop; } +#define MAX_N_CHARS 3 + +static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit; +pcre_uint32 chars[MAX_N_CHARS * 2]; +pcre_uchar *cc = common->start + 1 + LINK_SIZE; +int location = 0; +pcre_int32 len, c, bit, caseless; +int must_stop; + +/* We do not support alternatives now. */ +if (*(common->start + GET(common->start, 1)) == OP_ALT) + return FALSE; + +while (TRUE) + { + caseless = 0; + must_stop = 1; + switch(*cc) + { + case OP_CHAR: + must_stop = 0; + cc++; + break; + + case OP_CHARI: + caseless = 1; + must_stop = 0; + cc++; + break; + + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + cc++; + break; + + case OP_EXACT: + cc += 1 + IMM2_SIZE; + break; + + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + caseless = 1; + cc++; + break; + + case OP_EXACTI: + caseless = 1; + cc += 1 + IMM2_SIZE; + break; + + default: + must_stop = 2; + break; + } + + if (must_stop == 2) + break; + + len = 1; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]); +#endif + + if (caseless && char_has_othercase(common, cc)) + { + caseless = char_get_othercase_bit(common, cc); + if (caseless == 0) + return FALSE; +#ifdef COMPILE_PCRE8 + caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8)); +#else + if ((caseless & 0x100) != 0) + caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9)); + else + caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9)); +#endif + } + else + caseless = 0; + + while (len > 0 && location < MAX_N_CHARS * 2) + { + c = *cc; + bit = 0; + if (len == (caseless & 0xff)) + { + bit = caseless >> 8; + c |= bit; + } + + chars[location] = c; + chars[location + 1] = bit; + + len--; + location += 2; + cc++; + } + + if (location >= MAX_N_CHARS * 2 || must_stop != 0) + break; + } + +/* At least two characters are required. */ +if (location < 2 * 2) + return FALSE; + +if (firstline) + { + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + } +else + OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + +start = LABEL(); +quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +if (chars[1] != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); +CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); +if (location > 2 * 2) + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +if (chars[3] != 0) + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]); +CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start); +if (location > 2 * 2) + { + if (chars[5] != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]); + CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start); + } +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +JUMPHERE(quit); + +if (firstline) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +else + OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); +return TRUE; +} + +#undef MAX_N_CHARS + static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) { DEFINE_COMPILER; struct sljit_label *start; -struct sljit_jump *leave; +struct sljit_jump *quit; struct sljit_jump *found; pcre_uchar oc, bit; if (firstline) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); } start = LABEL(); -leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); oc = first_char; @@ -1864,7 +3096,7 @@ if (first_char == oc) else { bit = first_char ^ oc; - if (ispowerof2(bit)) + if (is_powerof2(bit)) { OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); @@ -1872,39 +3104,20 @@ else else { OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); found = JUMP(SLJIT_C_NOT_ZERO); } } OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 -if (common->utf) - { - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - } -#endif -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 -if (common->utf) - { - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - } -#endif JUMPTO(SLJIT_JUMP, start); JUMPHERE(found); -JUMPHERE(leave); +JUMPHERE(quit); if (firstline) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); } static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) @@ -1913,15 +3126,16 @@ DEFINE_COMPILER; struct sljit_label *loop; struct sljit_jump *lastchar; struct sljit_jump *firstchar; -struct sljit_jump *leave; +struct sljit_jump *quit; struct sljit_jump *foundcr = NULL; struct sljit_jump *notfoundnl; jump_list *newline = NULL; if (firstline) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); } if (common->nltype == NLTYPE_FIXED && common->newline > 255) @@ -1934,21 +3148,21 @@ if (common->nltype == NLTYPE_FIXED && common->newline OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL); -#ifdef COMPILE_PCRE16 - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); #endif OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); loop = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); - JUMPHERE(leave); + JUMPHERE(quit); JUMPHERE(firstchar); JUMPHERE(lastchar); @@ -1972,91 +3186,108 @@ set_jumps(newline, loop); if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) { - leave = JUMP(SLJIT_JUMP); + quit = JUMP(SLJIT_JUMP); JUMPHERE(foundcr); notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); -#ifdef COMPILE_PCRE16 - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(notfoundnl); - JUMPHERE(leave); + JUMPHERE(quit); } JUMPHERE(lastchar); JUMPHERE(firstchar); if (firstline) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); } +static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks); + static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) { DEFINE_COMPILER; struct sljit_label *start; -struct sljit_jump *leave; -struct sljit_jump *found; +struct sljit_jump *quit; +struct sljit_jump *found = NULL; +jump_list *matches = NULL; +pcre_uint8 inverted_start_bits[32]; +int i; #ifndef COMPILE_PCRE8 struct sljit_jump *jump; #endif +for (i = 0; i < 32; ++i) + inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]); + if (firstline) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); } start = LABEL(); -leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); #ifdef SUPPORT_UTF if (common->utf) OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); #endif + +if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches)) + { #ifndef COMPILE_PCRE8 -jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); -JUMPHERE(jump); + jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); + JUMPHERE(jump); #endif -OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); -OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); -found = JUMP(SLJIT_C_NOT_ZERO); + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + found = JUMP(SLJIT_C_NOT_ZERO); + } #ifdef SUPPORT_UTF if (common->utf) OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); #endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +#ifdef SUPPORT_UTF +#if defined COMPILE_PCRE8 if (common->utf) { CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } -#endif -#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 if (common->utf) { CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } -#endif +#endif /* COMPILE_PCRE[8|16] */ +#endif /* SUPPORT_UTF */ JUMPTO(SLJIT_JUMP, start); -JUMPHERE(found); -JUMPHERE(leave); +if (found != NULL) + JUMPHERE(found); +if (matches != NULL) + set_jumps(matches, LABEL()); +JUMPHERE(quit); if (firstline) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); + OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); } static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) @@ -2068,9 +3299,10 @@ struct sljit_jump *alreadyfound; struct sljit_jump *found; struct sljit_jump *foundoc = NULL; struct sljit_jump *notfound; -pcre_uchar oc, bit; +pcre_uint32 oc, bit; -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR); +SLJIT_ASSERT(common->req_char_ptr != 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr); OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); @@ -2098,7 +3330,7 @@ if (req_char == oc) else { bit = req_char ^ oc; - if (ispowerof2(bit)) + if (is_powerof2(bit)) { OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); @@ -2115,7 +3347,7 @@ JUMPTO(SLJIT_JUMP, loop); JUMPHERE(found); if (foundoc) JUMPHERE(foundoc); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0); JUMPHERE(alreadyfound); JUMPHERE(toolong); return notfound; @@ -2127,55 +3359,54 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *mainloop; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); +GET_LOCAL_BASE(TMP3, 0, 0); /* Drop frames until we reach STACK_TOP. */ mainloop = LABEL(); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); -jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); -OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w)); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w)); +OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); +jump = JUMP(SLJIT_C_SIG_LESS_EQUAL); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); JUMPTO(SLJIT_JUMP, mainloop); JUMPHERE(jump); -jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end); +jump = JUMP(SLJIT_C_SIG_LESS); /* End of dropping frames. */ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(jump); -jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin); -/* Set string begin. */ -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w)); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); +OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); JUMPTO(SLJIT_JUMP, mainloop); - -JUMPHERE(jump); -/* Unknown command. */ -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w)); -JUMPTO(SLJIT_JUMP, mainloop); } static void check_wordboundary(compiler_common *common) { DEFINE_COMPILER; -struct sljit_jump *beginend; +struct sljit_jump *skipread; +jump_list *skipread_list = NULL; #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF struct sljit_jump *jump; #endif SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); /* Get type of the previous char, and put it to LOCALS1. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0); -beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); +skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); skip_char_back(common); +check_start_used_ptr(common); read_char(common); /* Testing char type. */ @@ -2187,10 +3418,10 @@ if (common->use_ucp) add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); JUMPHERE(jump); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); } @@ -2216,10 +3447,10 @@ else JUMPHERE(jump); #endif /* COMPILE_PCRE8 */ } -JUMPHERE(beginend); +JUMPHERE(skipread); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); -beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +check_str_end(common, &skipread_list); peek_char(common); /* Testing char type. This is a code duplication. */ @@ -2231,10 +3462,10 @@ if (common->use_ucp) add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); JUMPHERE(jump); } else @@ -2260,36 +3491,175 @@ else JUMPHERE(jump); #endif /* COMPILE_PCRE8 */ } -JUMPHERE(beginend); +set_jumps(skipread_list, LABEL()); OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); } +/* + range format: + + ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range). + ranges[1] = first bit (0 or 1) + ranges[2-length] = position of the bit change (when the current bit is not equal to the previous) +*/ + +static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (ranges[0] < 0) + return FALSE; + +switch(ranges[0]) + { + case 1: + if (readch) + read_char(common); + add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + return TRUE; + + case 2: + if (readch) + read_char(common); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); + add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + return TRUE; + + case 4: + if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5]) + { + if (readch) + read_char(common); + if (ranges[1] != 0) + { + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); + } + else + { + jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); + JUMPHERE(jump); + } + return TRUE; + } + if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2])) + { + if (readch) + read_char(common); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]); + add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4])); + return TRUE; + } + return FALSE; + + default: + return FALSE; + } +} + +static void get_ctype_ranges(compiler_common *common, int flag, int *ranges) +{ +int i, bit, length; +const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes; + +bit = ctypes[0] & flag; +ranges[0] = -1; +ranges[1] = bit != 0 ? 1 : 0; +length = 0; + +for (i = 1; i < 256; i++) + if ((ctypes[i] & flag) != bit) + { + if (length >= MAX_RANGE_SIZE) + return; + ranges[2 + length] = i; + length++; + bit ^= flag; + } + +if (bit != 0) + { + if (length >= MAX_RANGE_SIZE) + return; + ranges[2 + length] = 256; + length++; + } +ranges[0] = length; +} + +static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks) +{ +int ranges[2 + MAX_RANGE_SIZE]; +pcre_uint8 bit, cbit, all; +int i, byte, length = 0; + +bit = bits[0] & 0x1; +ranges[1] = bit; +/* Can be 0 or 255. */ +all = -bit; + +for (i = 0; i < 256; ) + { + byte = i >> 3; + if ((i & 0x7) == 0 && bits[byte] == all) + i += 8; + else + { + cbit = (bits[byte] >> (i & 0x7)) & 0x1; + if (cbit != bit) + { + if (length >= MAX_RANGE_SIZE) + return FALSE; + ranges[2 + length] = i; + length++; + bit = cbit; + all = -cbit; + } + i++; + } + } + +if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) + { + if (length >= MAX_RANGE_SIZE) + return FALSE; + ranges[2 + length] = 256; + length++; + } +ranges[0] = length; + +return check_ranges(common, ranges, backtracks, FALSE); +} + static void check_anynewline(compiler_common *common) { /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); -COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 #ifdef COMPILE_PCRE8 if (common->utf) { #endif - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #ifdef COMPILE_PCRE8 } #endif -#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ -COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); +#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -2298,36 +3668,36 @@ static void check_hspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); -COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); -COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 #ifdef COMPILE_PCRE8 if (common->utf) { #endif - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); #ifdef COMPILE_PCRE8 } #endif -#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ -COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); +#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -2337,25 +3707,25 @@ static void check_vspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); -COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 #ifdef COMPILE_PCRE8 if (common->utf) { #endif - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #ifdef COMPILE_PCRE8 } #endif -#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ -COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); +#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -2369,7 +3739,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *label; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0); @@ -2398,7 +3768,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *label; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); @@ -2441,20 +3811,31 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); #if defined SUPPORT_UTF && defined SUPPORT_UCP -static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) +static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) { /* This function would be ineffective to do in JIT level. */ -int c1, c2; -const pcre_uchar *src2 = args->ptr; +pcre_uint32 c1, c2; +const pcre_uchar *src2 = args->uchar_ptr; const pcre_uchar *end2 = args->end; +const ucd_record *ur; +const pcre_uint32 *pp; while (src1 < end1) { if (src2 >= end2) - return 0; + return (pcre_uchar*)1; GETCHARINC(c1, src1); GETCHARINC(c2, src2); - if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0; + ur = GET_UCD(c2); + if (c1 != c2 && c1 != c2 + ur->other_case) + { + pp = PRIV(ucd_caseless_sets) + ur->caseset; + for (;;) + { + if (c1 < *pp) return NULL; + if (c1 == *pp++) break; + } + } } return src2; } @@ -2462,7 +3843,7 @@ return src2; #endif /* SUPPORT_UTF && SUPPORT_UCP */ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, - compare_context* context, jump_list **fallbacks) + compare_context* context, jump_list **backtracks) { DEFINE_COMPILER; unsigned int othercasebit = 0; @@ -2476,23 +3857,25 @@ if (caseless && char_has_othercase(common, cc)) othercasebit = char_get_othercase_bit(common, cc); SLJIT_ASSERT(othercasebit); /* Extracting bit difference info. */ -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 othercasechar = cc + (othercasebit >> 8); othercasebit &= 0xff; -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + /* Note that this code only handles characters in the BMP. If there + ever are characters outside the BMP whose othercase differs in only one + bit from itself (there currently are none), this code will need to be + revised for COMPILE_PCRE32. */ othercasechar = cc + (othercasebit >> 9); if ((othercasebit & 0x100) != 0) othercasebit = (othercasebit & 0xff) << 8; else othercasebit &= 0xff; -#endif -#endif +#endif /* COMPILE_PCRE[8|16|32] */ } if (context->sourcereg == -1) { -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); @@ -2501,16 +3884,16 @@ if (context->sourcereg == -1) else #endif OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#else -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif - OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif -#endif /* COMPILE_PCRE8 */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif defined COMPILE_PCRE32 + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* COMPILE_PCRE[8|16|32] */ context->sourcereg = TMP2; } @@ -2524,7 +3907,7 @@ do #endif context->length -= IN_UCHARS(1); -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) /* Unaligned read is supported. */ if (othercasebit != 0 && othercasechar == cc) @@ -2539,7 +3922,7 @@ do } context->ucharptr++; -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) #else if (context->ucharptr >= 2 || context->length == 0) @@ -2547,15 +3930,12 @@ do { if (context->length >= 4) OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#ifdef COMPILE_PCRE8 else if (context->length >= 2) OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#if defined COMPILE_PCRE8 else if (context->length >= 1) OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#else - else if (context->length >= 2) - OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif +#endif /* COMPILE_PCRE8 */ context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; switch(context->ucharptr) @@ -2563,20 +3943,20 @@ do case 4 / sizeof(pcre_uchar): if (context->oc.asint != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); break; case 2 / sizeof(pcre_uchar): if (context->oc.asushort != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); break; #ifdef COMPILE_PCRE8 case 1: if (context->oc.asbyte != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); break; #endif @@ -2589,23 +3969,19 @@ do #else - /* Unaligned read is unsupported. */ -#ifdef COMPILE_PCRE8 - if (context->length > 0) - OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#else - if (context->length > 0) - OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif + /* Unaligned read is unsupported or in 32 bit mode. */ + if (context->length >= 1) + OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; if (othercasebit != 0 && othercasechar == cc) { OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); } else - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); #endif @@ -2641,26 +4017,27 @@ return cc; } \ charoffset = (value); -static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks) +static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; jump_list *found = NULL; -jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks; -unsigned int c; -int compares; +jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; +pcre_int32 c, charoffset; struct sljit_jump *jump = NULL; pcre_uchar *ccbegin; +int compares, invertcmp, numberofcmps; + #ifdef SUPPORT_UCP BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; BOOL charsaved = FALSE; int typereg = TMP1, scriptreg = TMP1; -unsigned int typeoffset; +const pcre_uint32 *other_cases; +pcre_int32 typeoffset; #endif -int invertcmp, numberofcmps; -unsigned int charoffset; -/* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */ -check_input_end(common, fallbacks); +/* Although SUPPORT_UTF must be defined, we are + not necessary in utf mode even in 8 bit mode. */ +detect_partial_match(common, backtracks); read_char(common); if ((*cc++ & XCL_MAP) != 0) @@ -2673,12 +4050,15 @@ if ((*cc++ & XCL_MAP) != 0) jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); + if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list)) + { + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); + } #ifndef COMPILE_PCRE8 JUMPHERE(jump); @@ -2747,10 +4127,18 @@ while (*cc != XCL_END) case PT_SPACE: case PT_PXSPACE: case PT_WORD: + case PT_PXGRAPH: + case PT_PXPRINT: + case PT_PXPUNCT: needstype = TRUE; needschar = TRUE; break; + case PT_CLIST: + case PT_UCNC: + needschar = TRUE; + break; + default: SLJIT_ASSERT_STOP(); break; @@ -2787,13 +4175,13 @@ if (needstype || needsscript) { if (scriptreg == TMP1) { - OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); } else { OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); } } @@ -2811,7 +4199,7 @@ typeoffset = 0; while (*cc != XCL_END) { compares--; - invertcmp = (compares == 0 && list != fallbacks); + invertcmp = (compares == 0 && list != backtracks); jump = NULL; if (*cc == XCL_SINGLE) @@ -2829,13 +4217,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); numberofcmps = 0; } @@ -2868,13 +4256,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); numberofcmps = 0; } @@ -2893,7 +4281,7 @@ while (*cc != XCL_END) switch(*cc) { case PT_ANY: - if (list != fallbacks) + if (list != backtracks) { if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0)) continue; @@ -2905,11 +4293,11 @@ while (*cc != XCL_END) case PT_LAMP: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); - COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); break; @@ -2929,44 +4317,174 @@ while (*cc != XCL_END) case PT_SPACE: case PT_PXSPACE: - if (*cc == PT_SPACE) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset); - } SET_CHAR_OFFSET(9); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); - if (*cc == PT_SPACE) - JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + SET_TYPE_OFFSET(ucp_Zl); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); break; case PT_WORD: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); - /* ... fall through */ + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + /* Fall through. */ case PT_ALNUM: SET_TYPE_OFFSET(ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); SET_TYPE_OFFSET(ucp_Nd); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); break; + + case PT_CLIST: + other_cases = PRIV(ucd_caseless_sets) + cc[1]; + + /* At least three characters are required. + Otherwise this case would be handled by the normal code path. */ + SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); + SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); + + /* Optimizing character pairs, if their difference is power of 2. */ + if (is_powerof2(other_cases[1] ^ other_cases[0])) + { + if (charoffset == 0) + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + else + { + OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + } + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + other_cases += 2; + } + else if (is_powerof2(other_cases[2] ^ other_cases[1])) + { + if (charoffset == 0) + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); + else + { + OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + } + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset); + OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + other_cases += 3; + } + else + { + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + } + + while (*other_cases != NOTACHAR) + { + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset); + OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + } + jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + break; + + case PT_UCNC: + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + SET_CHAR_OFFSET(0xa0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + SET_CHAR_OFFSET(0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL); + jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + break; + + case PT_PXGRAPH: + /* C and Z groups are the farthest two groups. */ + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); + + jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + JUMPHERE(jump); + jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPRINT: + /* C and Z groups are the farthest two groups. */ + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); + OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); + + jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + + JUMPHERE(jump); + jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPUNCT: + SET_TYPE_OFFSET(ucp_Sc); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + + SET_CHAR_OFFSET(0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff); + OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + + SET_TYPE_OFFSET(ucp_Pc); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + break; } cc += 2; } #endif if (jump != NULL) - add_jump(compiler, compares > 0 ? list : fallbacks, jump); + add_jump(compiler, compares > 0 ? list : backtracks, jump); } if (found != NULL) @@ -2978,13 +4496,14 @@ if (found != NULL) #endif -static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks) +static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; int length; unsigned int c, oc, bit; compare_context context; struct sljit_jump *jump[4]; +jump_list *end_list; #ifdef SUPPORT_UTF struct sljit_label *label; #ifdef SUPPORT_UCP @@ -2997,83 +4516,99 @@ switch(type) case OP_SOD: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_SOM: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); return cc; case OP_NOT_DIGIT: case OP_DIGIT: - check_input_end(common, fallbacks); - read_char8_type(common); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); - add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + /* Digits are usually 0-9, so it is worth to optimize them. */ + if (common->digits[0] == -2) + get_ctype_ranges(common, ctype_digit, common->digits); + detect_partial_match(common, backtracks); + /* Flip the starting bit in the negative case. */ + if (type == OP_NOT_DIGIT) + common->digits[1] ^= 1; + if (!check_ranges(common, common->digits, backtracks, TRUE)) + { + read_char8_type(common); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); + add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + } + if (type == OP_NOT_DIGIT) + common->digits[1] ^= 1; return cc; case OP_NOT_WHITESPACE: case OP_WHITESPACE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char8_type(common); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); - add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); return cc; case OP_NOT_WORDCHAR: case OP_WORDCHAR: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char8_type(common); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); - add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); return cc; case OP_ANY: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + end_list = NULL; + if (common->mode != JIT_PARTIAL_HARD_COMPILE) + add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); - JUMPHERE(jump[1]); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); + set_jumps(end_list, LABEL()); JUMPHERE(jump[0]); } else - check_newlinechar(common, common->nltype, fallbacks, TRUE); + check_newlinechar(common, common->nltype, backtracks, TRUE); return cc; case OP_ALLANY: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); #ifdef SUPPORT_UTF if (common->utf) { OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 +#if defined COMPILE_PCRE8 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#else /* COMPILE_PCRE8 */ -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#endif /* COMPILE_PCRE16 */ -#endif /* COMPILE_PCRE8 */ +#endif JUMPHERE(jump[0]); +#endif /* COMPILE_PCRE[8|16] */ return cc; } #endif @@ -3081,7 +4616,7 @@ switch(type) return cc; case OP_ANYBYTE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; @@ -3094,81 +4629,118 @@ switch(type) propdata[2] = cc[0]; propdata[3] = cc[1]; propdata[4] = XCL_END; - compile_xclass_hotpath(common, propdata, fallbacks); + compile_xclass_matchingpath(common, propdata, backtracks); return cc + 2; #endif #endif case OP_ANYNL: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + /* We don't need to handle soft partial matching case. */ + end_list = NULL; + if (common->mode != JIT_PARTIAL_HARD_COMPILE) + add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[3] = JUMP(SLJIT_JUMP); + jump[2] = JUMP(SLJIT_JUMP); JUMPHERE(jump[0]); - check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE); + check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); + set_jumps(end_list, LABEL()); JUMPHERE(jump[1]); JUMPHERE(jump[2]); - JUMPHERE(jump[3]); return cc; case OP_NOT_HSPACE: case OP_HSPACE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); return cc; case OP_NOT_VSPACE: case OP_VSPACE: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); return cc; #ifdef SUPPORT_UCP case OP_EXTUNI: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); + /* Optimize register allocation: use a real register. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); + OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); label = LABEL(); jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); read_char(common); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc); - CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); + OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); + OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); + OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + JUMPTO(SLJIT_C_NOT_ZERO, label); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); JUMPHERE(jump[0]); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); + + if (common->mode == JIT_PARTIAL_HARD_COMPILE) + { + jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); + /* Since we successfully read a char above, partial matching must occure. */ + check_partial(common, TRUE); + JUMPHERE(jump[0]); + } return cc; #endif case OP_EODN: + /* Requires rather complex checks. */ jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else if (common->nltype == NLTYPE_FIXED) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } else { @@ -3177,44 +4749,46 @@ switch(type) OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); jump[2] = JUMP(SLJIT_C_GREATER); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS)); /* Equal. */ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP)); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); JUMPHERE(jump[1]); if (common->nltype == NLTYPE_ANYCRLF) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); } else { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0); read_char(common); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); } JUMPHERE(jump[2]); JUMPHERE(jump[3]); } JUMPHERE(jump[0]); + check_partial(common, FALSE); return cc; case OP_EOD: - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); return cc; case OP_CIRC: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); return cc; case OP_CIRCM: @@ -3222,25 +4796,25 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else { skip_char_back(common); read_char(common); - check_newlinechar(common, common->nltype, fallbacks, FALSE); + check_newlinechar(common, common->nltype, backtracks, FALSE); } JUMPHERE(jump[0]); return cc; @@ -3248,35 +4822,50 @@ switch(type) case OP_DOLL: OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); if (!common->endonly) - compile_char1_hotpath(common, OP_EODN, cc, fallbacks); + compile_char1_matchingpath(common, OP_EODN, cc, backtracks); else - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); + { + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + } return cc; case OP_DOLLM: jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else { peek_char(common); - check_newlinechar(common, common->nltype, fallbacks, FALSE); + check_newlinechar(common, common->nltype, backtracks, FALSE); } JUMPHERE(jump[0]); return cc; @@ -3287,19 +4876,19 @@ switch(type) #ifdef SUPPORT_UTF if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); #endif - if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) + if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) { OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); context.length = IN_UCHARS(length); context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED context.ucharptr = 0; #endif - return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks); + return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); } - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); #ifdef SUPPORT_UTF if (common->utf) @@ -3309,16 +4898,29 @@ switch(type) else #endif c = *cc; + if (type == OP_CHAR || !char_has_othercase(common, cc)) + { + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + return cc + length; + } + oc = char_othercase(common, c); + bit = c ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + return cc + length; + } OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c)); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); return cc + length; case OP_NOT: case OP_NOTI: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); length = 1; #ifdef SUPPORT_UTF if (common->utf) @@ -3329,17 +4931,17 @@ switch(type) { OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); else { /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); } /* Skip the variable-length character. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(jump[0]); return cc + 1; @@ -3359,28 +4961,31 @@ switch(type) } if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); else { oc = char_othercase(common, c); bit = c ^ oc; - if (ispowerof2(bit)) + if (is_powerof2(bit)) { OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); } else { - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); } } - return cc + 1; + return cc + length; case OP_CLASS: case OP_NCLASS: - check_input_end(common, fallbacks); + detect_partial_match(common, backtracks); read_char(common); + if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks)) + return cc + 32 / sizeof(pcre_uchar); + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 jump[0] = NULL; #ifdef COMPILE_PCRE8 @@ -3392,32 +4997,33 @@ switch(type) jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); if (type == OP_CLASS) { - add_jump(compiler, fallbacks, jump[0]); + add_jump(compiler, backtracks, jump[0]); jump[0] = NULL; } } #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (jump[0] != NULL) JUMPHERE(jump[0]); #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ return cc + 32 / sizeof(pcre_uchar); -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 case OP_XCLASS: - compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks); + compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); return cc + GET(cc, 0) - 1; #endif case OP_REVERSE: length = GET(cc, 0); - SLJIT_ASSERT(length > 0); + if (length == 0) + return cc + LINK_SIZE; OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); #ifdef SUPPORT_UTF if (common->utf) @@ -3425,23 +5031,26 @@ switch(type) OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); label = LABEL(); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); skip_char_back(common); OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, label); - return cc + LINK_SIZE; } + else #endif - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); + } + check_start_used_ptr(common); return cc + LINK_SIZE; } SLJIT_ASSERT_STOP(); return cc; } -static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks) +static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) { /* This function consumes at least one input character. */ /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ @@ -3493,128 +5102,185 @@ if (context.length > 0) { /* We have a fixed-length byte sequence. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED context.ucharptr = 0; #endif - do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0); + do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); return cc; } /* A non-fixed length character will be checked if length == 0. */ -return compile_char1_hotpath(common, *cc, cc + 1, fallbacks); +return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); } -static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks) -{ -DEFINE_COMPILER; -int offset = GET2(cc, 1) << 1; - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); -if (!common->jscript_compat) - { - if (fallbacks == NULL) - { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); - return JUMP(SLJIT_C_NOT_ZERO); - } - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - } -return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); -} - /* Forward definitions. */ -static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *); -static void compile_fallbackpath(compiler_common *, struct fallback_common *); +static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); +static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); -#define PUSH_FALLBACK(size, ccstart, error) \ +#define PUSH_BACKTRACK(size, ccstart, error) \ do \ { \ - fallback = sljit_alloc_memory(compiler, (size)); \ + backtrack = sljit_alloc_memory(compiler, (size)); \ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ return error; \ - memset(fallback, 0, size); \ - fallback->prev = parent->top; \ - fallback->cc = (ccstart); \ - parent->top = fallback; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ } \ while (0) -#define PUSH_FALLBACK_NOVALUE(size, ccstart) \ +#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ do \ { \ - fallback = sljit_alloc_memory(compiler, (size)); \ + backtrack = sljit_alloc_memory(compiler, (size)); \ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ return; \ - memset(fallback, 0, size); \ - fallback->prev = parent->top; \ - fallback->cc = (ccstart); \ - parent->top = fallback; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ } \ while (0) -#define FALLBACK_AS(type) ((type*)fallback) +#define BACKTRACK_AS(type) ((type *)backtrack) -static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail) +static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) { +/* The OVECTOR offset goes to TMP2. */ DEFINE_COMPILER; -int offset = GET2(cc, 1) << 1; +int count = GET2(cc, 1 + IMM2_SIZE); +pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; +unsigned int offset; +jump_list *found = NULL; + +SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); + +count--; +while (count-- > 0) + { + offset = GET2(slot, 0) << 1; + GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); + add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0)); + slot += common->name_entry_size; + } + +offset = GET2(slot, 0) << 1; +GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); +if (backtracks != NULL && !common->jscript_compat) + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0)); + +set_jumps(found, LABEL()); +} + +static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) +{ +DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +int offset = 0; struct sljit_jump *jump = NULL; +struct sljit_jump *partial; +struct sljit_jump *nopartial; -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); -if (withchecks && !common->jscript_compat) - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); +if (ref) + { + offset = GET2(cc, 1) << 1; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); + /* OVECTOR(1) contains the "string begin - 1" constant. */ + if (withchecks && !common->jscript_compat) + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + } +else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); #if defined SUPPORT_UTF && defined SUPPORT_UCP if (common->utf && *cc == OP_REFI) { - SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3); + if (ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + if (withchecks) jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0); /* Needed to save important temporary registers. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); + else + { + add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(nopartial); + } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); } else #endif /* SUPPORT_UTF && SUPPORT_UCP */ { - OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); + if (ref) + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); + else + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); + if (withchecks) jump = JUMP(SLJIT_C_ZERO); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, partial); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + + if (common->mode != JIT_COMPILE) + { + nopartial = JUMP(SLJIT_JUMP); + JUMPHERE(partial); + /* TMP2 -= STR_END - STR_PTR */ + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); + partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + JUMPHERE(partial); + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(nopartial); + } } if (jump != NULL) { if (emptyfail) - add_jump(compiler, fallbacks, jump); + add_jump(compiler, backtracks, jump); else JUMPHERE(jump); } -return cc + 1 + IMM2_SIZE; } -static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +backtrack_common *backtrack; pcre_uchar type; +int offset = 0; struct sljit_label *label; struct sljit_jump *zerolength; struct sljit_jump *jump = NULL; @@ -3622,9 +5288,15 @@ pcre_uchar *ccbegin = cc; int min = 0, max = 0; BOOL minimize; -PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); +if (ref) + offset = GET2(cc, 1) << 1; +else + cc += IMM2_SIZE; type = cc[1 + IMM2_SIZE]; + +SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); minimize = (type & 0x1) != 0; switch(type) { @@ -3662,26 +5334,53 @@ if (!minimize) if (min == 0) { allocate_stack(common, 2); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); /* Temporary release of STR_PTR. */ - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); - zerolength = compile_ref_checks(common, ccbegin, NULL); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } /* Restore if not zero length. */ - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); } else { allocate_stack(common, 1); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks); + if (ref) + { + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } } if (min > 1 || max > 1) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); label = LABEL(); - compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE); + if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); + compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); if (min > 1 || max > 1) { @@ -3709,31 +5408,59 @@ if (!minimize) } JUMPHERE(zerolength); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); - decrease_call_count(common); + count_match(common); return cc; } -allocate_stack(common, 2); +allocate_stack(common, ref ? 2 : 3); +if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); if (type != OP_CRMINSTAR) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); if (min == 0) { - zerolength = compile_ref_checks(common, ccbegin, NULL); + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + /* Length is non-zero, we can match real repeats. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); jump = JUMP(SLJIT_JUMP); } else - zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks); + { + if (ref) + { + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + } -FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); +BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); if (max > 0) - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); -compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE); +if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); +compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (min > 1) @@ -3741,7 +5468,7 @@ if (min > 1) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath); } else if (max > 0) OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); @@ -3750,19 +5477,31 @@ if (jump != NULL) JUMPHERE(jump); JUMPHERE(zerolength); -decrease_call_count(common); +count_match(common); return cc; } -static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; recurse_entry *entry = common->entries; recurse_entry *prev = NULL; -int start = GET(cc, 1); +sljit_sw start = GET(cc, 1); +pcre_uchar *start_cc; +BOOL needs_control_head; -PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); + +/* Inlining simple patterns. */ +if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) + { + start_cc = common->start + start; + compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); + BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; + return cc + 1 + LINK_SIZE; + } + while (entry != NULL) { if (entry->start == start) @@ -3787,48 +5526,169 @@ if (entry == NULL) common->entries = entry; } -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); -allocate_stack(common, 1); -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); +if (common->has_set_som && common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + allocate_stack(common, 2); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + } +else if (common->has_set_som || common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } if (entry->entry == NULL) add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); else JUMPTO(SLJIT_FAST_CALL, entry->entry); /* Leave if the match is failed. */ -add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); return cc + 1 + LINK_SIZE; } -static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional) +static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) { +const pcre_uchar *begin = arguments->begin; +int *offset_vector = arguments->offsets; +int offset_count = arguments->offset_count; +int i; + +if (PUBL(callout) == NULL) + return 0; + +callout_block->version = 2; +callout_block->callout_data = arguments->callout_data; + +/* Offsets in subject. */ +callout_block->subject_length = arguments->end - arguments->begin; +callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin; +callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin; +#if defined COMPILE_PCRE8 +callout_block->subject = (PCRE_SPTR)begin; +#elif defined COMPILE_PCRE16 +callout_block->subject = (PCRE_SPTR16)begin; +#elif defined COMPILE_PCRE32 +callout_block->subject = (PCRE_SPTR32)begin; +#endif + +/* Convert and copy the JIT offset vector to the offset_vector array. */ +callout_block->capture_top = 0; +callout_block->offset_vector = offset_vector; +for (i = 2; i < offset_count; i += 2) + { + offset_vector[i] = jit_ovector[i] - begin; + offset_vector[i + 1] = jit_ovector[i + 1] - begin; + if (jit_ovector[i] >= begin) + callout_block->capture_top = i; + } + +callout_block->capture_top = (callout_block->capture_top >> 1) + 1; +if (offset_count > 0) + offset_vector[0] = -1; +if (offset_count > 1) + offset_vector[1] = -1; +return (*PUBL(callout))(callout_block); +} + +/* Aligning to 8 byte. */ +#define CALLOUT_ARG_SIZE \ + (((int)sizeof(PUBL(callout_block)) + 7) & ~7) + +#define CALLOUT_ARG_OFFSET(arg) \ + (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg)) + +static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) +{ DEFINE_COMPILER; +backtrack_common *backtrack; + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); +OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +SLJIT_ASSERT(common->capture_last_ptr != 0); +OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); +OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); + +/* These pointer sized fields temporarly stores internal variables. */ +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); + +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); +OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); +OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); + +/* Needed to save important temporary registers. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); +OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); +GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START); +sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); +OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); + +/* Check return value. */ +OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER)); +if (common->forced_quit_label == NULL) + add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS)); +else + JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label); +return cc + 2 + 2 * LINK_SIZE; +} + +#undef CALLOUT_ARG_SIZE +#undef CALLOUT_ARG_OFFSET + +static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) +{ +DEFINE_COMPILER; int framesize; -int localptr; -fallback_common altfallback; +int extrasize; +BOOL needs_control_head; +int private_data_ptr; +backtrack_common altbacktrack; pcre_uchar *ccbegin; pcre_uchar opcode; pcre_uchar bra = OP_BRA; jump_list *tmp = NULL; -jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks; +jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; jump_list **found; /* Saving previous accept variables. */ -struct sljit_label *save_acceptlabel = common->acceptlabel; +BOOL save_local_exit = common->local_exit; +BOOL save_positive_assert = common->positive_assert; +then_trap_backtrack *save_then_trap = common->then_trap; +struct sljit_label *save_quit_label = common->quit_label; +struct sljit_label *save_accept_label = common->accept_label; +jump_list *save_quit = common->quit; +jump_list *save_positive_assert_quit = common->positive_assert_quit; +jump_list *save_accept = common->accept; struct sljit_jump *jump; struct sljit_jump *brajump = NULL; -jump_list *save_accept = common->accept; +/* Assert captures then. */ +common->then_trap = NULL; + if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { SLJIT_ASSERT(!conditional); bra = *cc; cc++; } -localptr = PRIV_DATA(cc); -SLJIT_ASSERT(localptr != 0); -framesize = get_framesize(common, cc, FALSE); -fallback->framesize = framesize; -fallback->localptr = localptr; +private_data_ptr = PRIVATE_DATA(cc); +SLJIT_ASSERT(private_data_ptr != 0); +framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); +backtrack->framesize = framesize; +backtrack->private_data_ptr = private_data_ptr; opcode = *cc; SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; @@ -3837,7 +5697,7 @@ cc += GET(cc, 1); if (bra == OP_BRAMINZERO) { - /* This is a braminzero fallback path. */ + /* This is a braminzero backtrack path. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); @@ -3845,94 +5705,155 @@ if (bra == OP_BRAMINZERO) if (framesize < 0) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0); - allocate_stack(common, 1); + extrasize = needs_control_head ? 2 : 1; + if (framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); + allocate_stack(common, extrasize); + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + } } else { - allocate_stack(common, framesize + 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0); + extrasize = needs_control_head ? 3 : 2; + allocate_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - init_frame(common, ccbegin, framesize + 1, 2, FALSE); + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + } + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE); } -memset(&altfallback, 0, sizeof(fallback_common)); +memset(&altbacktrack, 0, sizeof(backtrack_common)); +if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + /* Negative assert is stronger than positive assert. */ + common->local_exit = TRUE; + common->quit_label = NULL; + common->quit = NULL; + common->positive_assert = FALSE; + } +else + common->positive_assert = TRUE; +common->positive_assert_quit = NULL; + while (1) { - common->acceptlabel = NULL; + common->accept_label = NULL; common->accept = NULL; - altfallback.top = NULL; - altfallback.topfallbacks = NULL; + altbacktrack.top = NULL; + altbacktrack.topbacktracks = NULL; if (*ccbegin == OP_ALT) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - altfallback.cc = ccbegin; - compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback); + altbacktrack.cc = ccbegin; + compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { - common->acceptlabel = save_acceptlabel; + if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + common->local_exit = save_local_exit; + common->quit_label = save_quit_label; + common->quit = save_quit; + } + common->positive_assert = save_positive_assert; + common->then_trap = save_then_trap; + common->accept_label = save_accept_label; + common->positive_assert_quit = save_positive_assert_quit; common->accept = save_accept; return NULL; } - common->acceptlabel = LABEL(); + common->accept_label = LABEL(); if (common->accept != NULL) - set_jumps(common->accept, common->acceptlabel); + set_jumps(common->accept, common->accept_label); /* Reset stack. */ if (framesize < 0) - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - else { + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + else + free_stack(common, extrasize); + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); + } + else + { if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) { - /* We don't need to keep the STR_PTR, only the previous localptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w)); + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); } else { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); } - } + } if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) { /* We know that STR_PTR was stored on the top of the stack. */ if (conditional) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0); else if (bra == OP_BRAZERO) { if (framesize < 0) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); } - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else if (framesize >= 0) { /* For OP_BRA and OP_BRAMINZERO. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); } } add_jump(compiler, found, JUMP(SLJIT_JUMP)); - compile_fallbackpath(common, altfallback.top); + compile_backtrackingpath(common, altbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { - common->acceptlabel = save_acceptlabel; + if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + common->local_exit = save_local_exit; + common->quit_label = save_quit_label; + common->quit = save_quit; + } + common->positive_assert = save_positive_assert; + common->then_trap = save_then_trap; + common->accept_label = save_accept_label; + common->positive_assert_quit = save_positive_assert_quit; common->accept = save_accept; return NULL; } - set_jumps(altfallback.topfallbacks, LABEL()); + set_jumps(altbacktrack.topbacktracks, LABEL()); if (*cc != OP_ALT) break; @@ -3940,8 +5861,34 @@ while (1) ccbegin = cc; cc += GET(cc, 1); } + +if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + SLJIT_ASSERT(common->positive_assert_quit == NULL); + /* Makes the check less complicated below. */ + common->positive_assert_quit = common->quit; + } + /* None of them matched. */ +if (common->positive_assert_quit != NULL) + { + jump = JUMP(SLJIT_JUMP); + set_jumps(common->positive_assert_quit, LABEL()); + SLJIT_ASSERT(framesize != no_stack); + if (framesize < 0) + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); + else + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); + } + JUMPHERE(jump); + } +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); + if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { /* Assert is failed. */ @@ -3952,22 +5899,26 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { /* The topmost item should be 0. */ if (bra == OP_BRAZERO) + { + if (extrasize == 2) + free_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } else - free_stack(common, 1); + free_stack(common, extrasize); } else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); /* The topmost item should be 0. */ if (bra == OP_BRAZERO) { - free_stack(common, framesize + 1); + free_stack(common, framesize + extrasize - 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else - free_stack(common, framesize + 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0); + free_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); } jump = JUMP(SLJIT_JUMP); if (bra != OP_BRAZERO) @@ -3978,13 +5929,17 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) if (framesize < 0) { /* We know that STR_PTR was stored on the top of the stack. */ - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); /* Keep the STR_PTR on the top of the stack. */ if (bra == OP_BRAZERO) - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); + { + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + if (extrasize == 2) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } else if (bra == OP_BRAMINZERO) { - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } } @@ -3992,35 +5947,44 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { if (bra == OP_BRA) { - /* We don't need to keep the STR_PTR, only the previous localptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw)); } else { - /* We don't need to keep the STR_PTR, only the previous localptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); + if (extrasize == 2) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (bra == OP_BRAMINZERO) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); + } } } if (bra == OP_BRAZERO) { - fallback->hotpath = LABEL(); - sljit_set_label(jump, fallback->hotpath); + backtrack->matchingpath = LABEL(); + SET_LABEL(jump, backtrack->matchingpath); } else if (bra == OP_BRAMINZERO) { - JUMPTO(SLJIT_JUMP, fallback->hotpath); + JUMPTO(SLJIT_JUMP, backtrack->matchingpath); JUMPHERE(brajump); if (framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); } - set_jumps(fallback->common.topfallbacks, LABEL()); + set_jumps(backtrack->common.topbacktracks, LABEL()); } } else @@ -4030,153 +5994,128 @@ else { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); if (bra != OP_BRA) + { + if (extrasize == 2) + free_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } else - free_stack(common, 1); + free_stack(common, extrasize); } else { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); /* The topmost item should be 0. */ if (bra != OP_BRA) { - free_stack(common, framesize + 1); + free_stack(common, framesize + extrasize - 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else - free_stack(common, framesize + 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0); + free_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); } if (bra == OP_BRAZERO) - fallback->hotpath = LABEL(); + backtrack->matchingpath = LABEL(); else if (bra == OP_BRAMINZERO) { - JUMPTO(SLJIT_JUMP, fallback->hotpath); + JUMPTO(SLJIT_JUMP, backtrack->matchingpath); JUMPHERE(brajump); } if (bra != OP_BRA) { - SLJIT_ASSERT(found == &fallback->common.topfallbacks); - set_jumps(fallback->common.topfallbacks, LABEL()); - fallback->common.topfallbacks = NULL; + SLJIT_ASSERT(found == &backtrack->common.topbacktracks); + set_jumps(backtrack->common.topbacktracks, LABEL()); + backtrack->common.topbacktracks = NULL; } } -common->acceptlabel = save_acceptlabel; +if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + common->local_exit = save_local_exit; + common->quit_label = save_quit_label; + common->quit = save_quit; + } +common->positive_assert = save_positive_assert; +common->then_trap = save_then_trap; +common->accept_label = save_accept_label; +common->positive_assert_quit = save_positive_assert_quit; common->accept = save_accept; return cc + 1 + LINK_SIZE; } -static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table) +static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) { -int condition = FALSE; -pcre_uchar *slotA = name_table; -pcre_uchar *slotB; -sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)]; -sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)]; -sljit_w no_capture; -int i; +DEFINE_COMPILER; +int stacksize; -locals += OVECTOR_START / sizeof(sljit_w); -no_capture = locals[1]; - -for (i = 0; i < name_count; i++) +if (framesize < 0) { - if (GET2(slotA, 0) == refno) break; - slotA += name_entry_size; - } + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + else + { + stacksize = needs_control_head ? 1 : 0; + if (ket != OP_KET || has_alternatives) + stacksize++; + free_stack(common, stacksize); + } -if (i < name_count) - { - /* Found a name for the number - there can be only one; duplicate names - for different numbers are allowed, but not vice versa. First scan down - for duplicates. */ + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0); - slotB = slotA; - while (slotB > name_table) + /* TMP2 which is set here used by OP_KETRMAX below. */ + if (ket == OP_KETRMAX) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); + else if (ket == OP_KETRMIN) { - slotB -= name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = locals[GET2(slotB, 0) << 1] != no_capture; - if (condition) break; - } - else break; + /* Move the STR_PTR to the private_data_ptr. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); } + } +else + { + stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0); - /* Scan up for duplicates */ - if (!condition) + if (ket == OP_KETRMAX) { - slotB = slotA; - for (i++; i < name_count; i++) - { - slotB += name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = locals[GET2(slotB, 0) << 1] != no_capture; - if (condition) break; - } - else break; - } + /* TMP2 which is set here used by OP_KETRMAX below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } } -return condition; +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0); } -static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table) +static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) { -int condition = FALSE; -pcre_uchar *slotA = name_table; -pcre_uchar *slotB; -sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)]; -sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)]; -sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)]; -int i; +DEFINE_COMPILER; -for (i = 0; i < name_count; i++) +if (common->capture_last_ptr != 0) { - if (GET2(slotA, 0) == recno) break; - slotA += name_entry_size; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + stacksize++; } - -if (i < name_count) +if (common->optimized_cbracket[offset >> 1] == 0) { - /* Found a name for the number - there can be only one; duplicate - names for different numbers are allowed, but not vice versa. First - scan down for duplicates. */ - - slotB = slotA; - while (slotB > name_table) - { - slotB -= name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == group_num; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - if (!condition) - { - slotB = slotA; - for (i++; i < name_count; i++) - { - slotB += name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == group_num; - if (condition) break; - } - else break; - } - } + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + stacksize += 2; } -return condition; +return stacksize; } /* @@ -4233,26 +6172,30 @@ return condition; Or nothing, if trace is unnecessary */ -static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar opcode; -int localptr = 0; +int private_data_ptr = 0; int offset = 0; -int stacksize; +int i, stacksize; +int repeat_ptr = 0, repeat_length = 0; +int repeat_type = 0, repeat_count = 0; pcre_uchar *ccbegin; -pcre_uchar *hotpath; +pcre_uchar *matchingpath; +pcre_uchar *slot; pcre_uchar bra = OP_BRA; pcre_uchar ket; -assert_fallback *assert; +assert_backtrack *assert; BOOL has_alternatives; +BOOL needs_control_head = FALSE; struct sljit_jump *jump; struct sljit_jump *skip; -struct sljit_label *rmaxlabel = NULL; -struct sljit_jump *braminzerojump = NULL; +struct sljit_label *rmax_label = NULL; +struct sljit_jump *braminzero = NULL; -PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -4263,35 +6206,36 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) opcode = *cc; ccbegin = cc; -hotpath = ccbegin + 1 + LINK_SIZE; +matchingpath = bracketend(cc) - 1 - LINK_SIZE; +ket = *matchingpath; +if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) + { + repeat_ptr = PRIVATE_DATA(matchingpath); + repeat_length = PRIVATE_DATA(matchingpath + 1); + repeat_type = PRIVATE_DATA(matchingpath + 2); + repeat_count = PRIVATE_DATA(matchingpath + 3); + SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); + if (repeat_type == OP_UPTO) + ket = OP_KETRMAX; + if (repeat_type == OP_MINUPTO) + ket = OP_KETRMIN; + } if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) { - /* Drop this bracket_fallback. */ - parent->top = fallback->prev; - return bracketend(cc); + /* Drop this bracket_backtrack. */ + parent->top = backtrack->prev; + return matchingpath + 1 + LINK_SIZE + repeat_length; } -ket = *(bracketend(cc) - 1 - LINK_SIZE); +matchingpath = ccbegin + 1 + LINK_SIZE; SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; -if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - { - has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE; - if (*hotpath == OP_NRREF) - { - stacksize = GET2(hotpath, 1); - if (common->currententry == NULL || stacksize == RREF_ANY) - has_alternatives = FALSE; - else if (common->currententry->start == 0) - has_alternatives = stacksize != 0; - else - has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - } - } +if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) + has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) opcode = OP_SCOND; @@ -4302,24 +6246,32 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) { /* Capturing brackets has a pre-allocated space. */ offset = GET2(ccbegin, 1 + LINK_SIZE); - localptr = OVECTOR_PRIV(offset); - offset <<= 1; - FALLBACK_AS(bracket_fallback)->localptr = localptr; - hotpath += IMM2_SIZE; + if (common->optimized_cbracket[offset] == 0) + { + private_data_ptr = OVECTOR_PRIV(offset); + offset <<= 1; + } + else + { + offset <<= 1; + private_data_ptr = OVECTOR(offset); + } + BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; + matchingpath += IMM2_SIZE; } else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) { /* Other brackets simply allocate the next entry. */ - localptr = PRIV_DATA(ccbegin); - SLJIT_ASSERT(localptr != 0); - FALLBACK_AS(bracket_fallback)->localptr = localptr; + private_data_ptr = PRIVATE_DATA(ccbegin); + SLJIT_ASSERT(private_data_ptr != 0); + BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; if (opcode == OP_ONCE) - FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE); + BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); } /* Instructions before the first alternative. */ stacksize = 0; -if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) +if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) stacksize++; if (bra == OP_BRAZERO) stacksize++; @@ -4328,7 +6280,7 @@ if (stacksize > 0) allocate_stack(common, stacksize); stacksize = 0; -if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) +if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); stacksize++; @@ -4339,12 +6291,12 @@ if (bra == OP_BRAZERO) if (bra == OP_BRAMINZERO) { - /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */ + /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); if (ket != OP_KETRMIN) { free_stack(common, 1); - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); } else { @@ -4356,16 +6308,16 @@ if (bra == OP_BRAMINZERO) skip = JUMP(SLJIT_JUMP); JUMPHERE(jump); /* Checking zero-length iteration. */ - if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0) + if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) { - /* When we come from outside, localptr contains the previous STR_PTR. */ - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ + braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); } else { /* Except when the whole stack frame must be saved. */ - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); } JUMPHERE(skip); } @@ -4378,80 +6330,129 @@ if (bra == OP_BRAMINZERO) } } +if (repeat_type != 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count); + if (repeat_type == OP_EXACT) + rmax_label = LABEL(); + } + if (ket == OP_KETRMIN) - FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); if (ket == OP_KETRMAX) { - rmaxlabel = LABEL(); - if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA) - FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel; + rmax_label = LABEL(); + if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; } /* Handling capturing brackets and alternatives. */ if (opcode == OP_ONCE) { - if (FALLBACK_AS(bracket_fallback)->u.framesize < 0) + stacksize = 0; + if (needs_control_head) { - /* Neither capturing brackets nor recursions are not found in the block. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + stacksize++; + } + + if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) + { + /* Neither capturing brackets nor recursions are found in the block. */ if (ket == OP_KETRMIN) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); + stacksize += 2; + if (!needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); } - else if (ket == OP_KETRMAX || has_alternatives) + else { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0); - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); + if (ket == OP_KETRMAX || has_alternatives) + stacksize++; } - else - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0); + + if (stacksize > 0) + allocate_stack(common, stacksize); + + stacksize = 0; + if (needs_control_head) + { + stacksize++; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + + if (ket == OP_KETRMIN) + { + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); + } + else if (ket == OP_KETRMAX || has_alternatives) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); } else { - if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) + if (ket != OP_KET || has_alternatives) + stacksize++; + + stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; + allocate_stack(common, stacksize); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + + stacksize = needs_control_head ? 1 : 0; + if (ket != OP_KET || has_alternatives) { - allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); + stacksize++; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); } else { - allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); } + init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE); } } else if (opcode == OP_CBRA || opcode == OP_SCBRA) { /* Saving the previous values. */ - allocate_stack(common, 3); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); + if (common->optimized_cbracket[offset >> 1] != 0) + { + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); + allocate_stack(common, 2); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } } else if (opcode == OP_SBRA || opcode == OP_SCOND) { /* Saving the previous value. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } else if (has_alternatives) @@ -4464,128 +6465,126 @@ else if (has_alternatives) /* Generating code for the first alternative. */ if (opcode == OP_COND || opcode == OP_SCOND) { - if (*hotpath == OP_CREF) + if (*matchingpath == OP_CREF) { SLJIT_ASSERT(has_alternatives); - add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), - CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - hotpath += 1 + IMM2_SIZE; + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), + CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + matchingpath += 1 + IMM2_SIZE; } - else if (*hotpath == OP_NCREF) + else if (*matchingpath == OP_DNCREF) { SLJIT_ASSERT(has_alternatives); - stacksize = GET2(hotpath, 1); - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); - - JUMPHERE(jump); - hotpath += 1 + IMM2_SIZE; + i = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + slot += common->name_entry_size; + i--; + while (i-- > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0); + slot += common->name_entry_size; + } + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO)); + matchingpath += 1 + 2 * IMM2_SIZE; } - else if (*hotpath == OP_RREF || *hotpath == OP_NRREF) + else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) { /* Never has other case. */ - FALLBACK_AS(bracket_fallback)->u.condfailed = NULL; + BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; + SLJIT_ASSERT(!has_alternatives); - stacksize = GET2(hotpath, 1); - if (common->currententry == NULL) - stacksize = 0; - else if (stacksize == RREF_ANY) - stacksize = 1; - else if (common->currententry->start == 0) - stacksize = stacksize == 0; - else - stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - - if (*hotpath == OP_RREF || stacksize || common->currententry == NULL) + if (*matchingpath == OP_RREF) { - SLJIT_ASSERT(!has_alternatives); + stacksize = GET2(matchingpath, 1); + if (common->currententry == NULL) + stacksize = 0; + else if (stacksize == RREF_ANY) + stacksize = 1; + else if (common->currententry->start == 0) + stacksize = stacksize == 0; + else + stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + if (stacksize != 0) - hotpath += 1 + IMM2_SIZE; + matchingpath += 1 + IMM2_SIZE; + } + else + { + if (common->currententry == NULL || common->currententry->start == 0) + stacksize = 0; else { + stacksize = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + while (stacksize > 0) + { + if ((int)GET2(slot, 0) == i) + break; + slot += common->name_entry_size; + stacksize--; + } + } + + if (stacksize != 0) + matchingpath += 1 + 2 * IMM2_SIZE; + } + + /* The stacksize == 0 is a common "else" case. */ + if (stacksize == 0) + { if (*cc == OP_ALT) { - hotpath = cc + 1 + LINK_SIZE; + matchingpath = cc + 1 + LINK_SIZE; cc += GET(cc, 1); } else - hotpath = cc; + matchingpath = cc; } - } - else - { - SLJIT_ASSERT(has_alternatives); - - stacksize = GET2(hotpath, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE)); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0); - OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); - hotpath += 1 + IMM2_SIZE; - } } else { - SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT); - /* Similar code as PUSH_FALLBACK macro. */ - assert = sljit_alloc_memory(compiler, sizeof(assert_fallback)); + SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); + /* Similar code as PUSH_BACKTRACK macro. */ + assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; - memset(assert, 0, sizeof(assert_fallback)); - assert->common.cc = hotpath; - FALLBACK_AS(bracket_fallback)->u.assert = assert; - hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE); + memset(assert, 0, sizeof(assert_backtrack)); + assert->common.cc = matchingpath; + BACKTRACK_AS(bracket_backtrack)->u.assert = assert; + matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); } } -compile_hotpath(common, hotpath, cc, fallback); +compile_matchingpath(common, matchingpath, cc, backtrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; if (opcode == OP_ONCE) - { - if (FALLBACK_AS(bracket_fallback)->u.framesize < 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - /* TMP2 which is set here used by OP_KETRMAX below. */ - if (ket == OP_KETRMAX) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); - else if (ket == OP_KETRMIN) - { - /* Move the STR_PTR to the localptr. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0); - } - } - else - { - stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1; - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w)); - if (ket == OP_KETRMAX) - { - /* TMP2 which is set here used by OP_KETRMAX below. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - } - } + match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); stacksize = 0; +if (repeat_type == OP_MINUPTO) + { + /* We need to preserve the counter. TMP2 will be used below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr); + stacksize++; + } if (ket != OP_KET || bra != OP_BRA) stacksize++; +if (offset != 0) + { + if (common->capture_last_ptr != 0) + stacksize++; + if (common->optimized_cbracket[offset >> 1] == 0) + stacksize += 2; + } if (has_alternatives && opcode != OP_ONCE) stacksize++; @@ -4593,102 +6592,142 @@ if (stacksize > 0) allocate_stack(common, stacksize); stacksize = 0; -if (ket != OP_KET) +if (repeat_type == OP_MINUPTO) { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + /* TMP2 was set above. */ + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); stacksize++; } -else if (bra != OP_BRA) + +if (ket != OP_KET || bra != OP_BRA) { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + if (ket != OP_KET) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); stacksize++; } +if (offset != 0) + stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); + if (has_alternatives) { if (opcode != OP_ONCE) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); if (ket != OP_KETRMAX) - FALLBACK_AS(bracket_fallback)->althotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); } -/* Must be after the hotpath label. */ -if (offset != 0) +/* Must be after the matchingpath label. */ +if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0); } if (ket == OP_KETRMAX) { - if (opcode == OP_ONCE || opcode >= OP_SBRA) + if (repeat_type != 0) { if (has_alternatives) - FALLBACK_AS(bracket_fallback)->althotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, rmax_label); + /* Drop STR_PTR for greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else if (opcode == OP_ONCE || opcode >= OP_SBRA) + { + if (has_alternatives) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); /* Checking zero-length iteration. */ if (opcode != OP_ONCE) - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel); + { + CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label); + /* Drop STR_PTR for greedy plus quantifier. */ + if (bra != OP_BRAZERO) + free_stack(common, 1); + } else /* TMP2 must contain the starting STR_PTR. */ - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel); + CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); } else - JUMPTO(SLJIT_JUMP, rmaxlabel); - FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL(); + JUMPTO(SLJIT_JUMP, rmax_label); + BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); } +if (repeat_type == OP_EXACT) + { + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, rmax_label); + } +else if (repeat_type == OP_UPTO) + { + /* We need to preserve the counter. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + if (bra == OP_BRAZERO) - FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL(); + BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); if (bra == OP_BRAMINZERO) { - /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */ - JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath); - if (braminzerojump != NULL) + /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ + JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); + if (braminzero != NULL) { - JUMPHERE(braminzerojump); + JUMPHERE(braminzero); /* We need to release the end pointer to perform the - fallback for the zero-length iteration. When + backtrack for the zero-length iteration. When framesize is < 0, OP_ONCE will do the release itself. */ - if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0) + if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); } else if (ket == OP_KETRMIN && opcode != OP_ONCE) free_stack(common, 1); } - /* Continue to the normal fallback. */ + /* Continue to the normal backtrack. */ } if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) - decrease_call_count(common); + count_match(common); /* Skip the other alternatives. */ while (*cc == OP_ALT) cc += GET(cc, 1); cc += 1 + LINK_SIZE; -return cc; + +/* Temporarily encoding the needs_control_head in framesize. */ +if (opcode == OP_ONCE) + BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); +return cc + repeat_length; } -static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar opcode; -int localptr; +int private_data_ptr; int cbraprivptr = 0; +BOOL needs_control_head; int framesize; int stacksize; int offset = 0; BOOL zero = FALSE; pcre_uchar *ccbegin = NULL; -int stack; +int stack; /* Also contains the offset of control head. */ struct sljit_label *loop = NULL; struct jump_list *emptymatch = NULL; -PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); if (*cc == OP_BRAPOSZERO) { zero = TRUE; @@ -4696,9 +6735,9 @@ if (*cc == OP_BRAPOSZERO) } opcode = *cc; -localptr = PRIV_DATA(cc); -SLJIT_ASSERT(localptr != 0); -FALLBACK_AS(bracketpos_fallback)->localptr = localptr; +private_data_ptr = PRIVATE_DATA(cc); +SLJIT_ASSERT(private_data_ptr != 0); +BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; switch(opcode) { case OP_BRAPOS: @@ -4709,6 +6748,9 @@ switch(opcode) case OP_CBRAPOS: case OP_SCBRAPOS: offset = GET2(cc, 1 + LINK_SIZE); + /* This case cannot be optimized in the same was as + normal capturing brackets. */ + SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); cbraprivptr = OVECTOR_PRIV(offset); offset <<= 1; ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; @@ -4719,81 +6761,129 @@ switch(opcode) break; } -framesize = get_framesize(common, cc, FALSE); -FALLBACK_AS(bracketpos_fallback)->framesize = framesize; +framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); +BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; if (framesize < 0) { - stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1; + if (offset != 0) + { + stacksize = 2; + if (common->capture_last_ptr != 0) + stacksize++; + } + else + stacksize = 1; + + if (needs_control_head) + stacksize++; if (!zero) stacksize++; - FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize; + + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; allocate_stack(common, stacksize); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0); + if (framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) + stack = 0; + if (offset != 0) { + stack = 2; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + if (common->capture_last_ptr != 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); + stack = 3; + } } else + { + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + stack = 1; + } + if (needs_control_head) + stack++; if (!zero) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); + if (needs_control_head) + { + stack--; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); + } } else { stacksize = framesize + 1; if (!zero) stacksize++; - if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS) + if (needs_control_head) stacksize++; - FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize; + if (offset == 0) + stacksize++; + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; + allocate_stack(common, stacksize); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0); stack = 0; if (!zero) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); + stack = 1; + } + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); stack++; } - if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS) + if (offset == 0) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); stack++; } OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); - init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE); + init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE); + stack -= 1 + (offset == 0); } -if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) +if (offset != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); loop = LABEL(); while (*cc != OP_KETRPOS) { - fallback->top = NULL; - fallback->topfallbacks = NULL; + backtrack->top = NULL; + backtrack->topbacktracks = NULL; cc += GET(cc, 1); - compile_hotpath(common, ccbegin, cc, fallback); + compile_matchingpath(common, ccbegin, cc, backtrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; if (framesize < 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) + if (offset != 0) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); } else @@ -4811,21 +6901,23 @@ while (*cc != OP_KETRPOS) } else { - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) + if (offset != 0) { - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); } else { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); if (opcode == OP_SBRAPOS) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w)); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); } if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) @@ -4839,34 +6931,38 @@ while (*cc != OP_KETRPOS) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } } + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + JUMPTO(SLJIT_JUMP, loop); flush_stubs(common); - compile_fallbackpath(common, fallback->top); + compile_backtrackingpath(common, backtrack->top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; - set_jumps(fallback->topfallbacks, LABEL()); + set_jumps(backtrack->topbacktracks, LABEL()); if (framesize < 0) { - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) + if (offset != 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } else { - if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) + if (offset != 0) { /* Last alternative. */ if (*cc == OP_KETRPOS) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); } else { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); } } @@ -4875,22 +6971,24 @@ while (*cc != OP_KETRPOS) ccbegin = cc + 1 + LINK_SIZE; } -fallback->topfallbacks = NULL; +/* We don't have to restore the control head in case of a failed match. */ + +backtrack->topbacktracks = NULL; if (!zero) { if (framesize < 0) - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); - else /* TMP2 is set to [localptr] above. */ - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + else /* TMP2 is set to [private_data_ptr] above. */ + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); } /* None of them matched. */ set_jumps(emptymatch, LABEL()); -decrease_call_count(common); +count_match(common); return cc + 1 + LINK_SIZE; } -static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end) +static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end) { int class_len; @@ -4926,7 +7024,7 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEP } else { - SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS); + SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); *type = *opcode; cc++; class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); @@ -4937,18 +7035,24 @@ else if (end != NULL) *end = cc + class_len; } + else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) + { + *opcode -= OP_CRPOSSTAR - OP_POSSTAR; + if (end != NULL) + *end = cc + class_len; + } else { - SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE); - *arg1 = GET2(cc, (class_len + IMM2_SIZE)); - *arg2 = GET2(cc, class_len); + SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); + *max = GET2(cc, (class_len + IMM2_SIZE)); + *min = GET2(cc, class_len); - if (*arg2 == 0) + if (*min == 0) { - SLJIT_ASSERT(*arg1 != 0); - *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO; + SLJIT_ASSERT(*max != 0); + *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO); } - if (*arg1 == *arg2) + if (*max == *min) *opcode = OP_EXACT; if (end != NULL) @@ -4959,7 +7063,7 @@ else if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) { - *arg1 = GET2(cc, 0); + *max = GET2(cc, 0); cc += IMM2_SIZE; } @@ -4982,22 +7086,66 @@ if (end != NULL) return cc; } -static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; pcre_uchar opcode; pcre_uchar type; -int arg1 = -1, arg2 = -1; +int max = -1, min = -1; pcre_uchar* end; jump_list *nomatch = NULL; struct sljit_jump *jump = NULL; struct sljit_label *label; +int private_data_ptr = PRIVATE_DATA(cc); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG); +int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int tmp_base, tmp_offset; -PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); -cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end); +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end); +switch(type) + { + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_CLASS: + case OP_NCLASS: + tmp_base = TMP3; + tmp_offset = 0; + break; + + default: + SLJIT_ASSERT_STOP(); + /* Fall through. */ + + case OP_EXTUNI: + case OP_XCLASS: + case OP_NOTPROP: + case OP_PROP: + tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG); + tmp_offset = POSSESSIVE0; + break; + } + switch(opcode) { case OP_STAR: @@ -5006,6 +7154,7 @@ switch(opcode) case OP_CRRANGE: if (type == OP_ANYNL || type == OP_EXTUNI) { + SLJIT_ASSERT(private_data_ptr == 0); if (opcode == OP_STAR || opcode == OP_UPTO) { allocate_stack(common, 2); @@ -5017,22 +7166,24 @@ switch(opcode) allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } + if (opcode == OP_UPTO || opcode == OP_CRRANGE) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); label = LABEL(); - compile_char1_hotpath(common, type, cc, &fallback->topfallbacks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); if (opcode == OP_UPTO || opcode == OP_CRRANGE) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - if (opcode == OP_CRRANGE && arg2 > 0) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label); - if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0)) - jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1); + if (opcode == OP_CRRANGE && min > 0) + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label); + if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0)) + jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); } + /* We cannot use TMP3 because of this allocate_stack. */ allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); JUMPTO(SLJIT_JUMP, label); @@ -5041,181 +7192,290 @@ switch(opcode) } else { - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); + if (opcode == OP_PLUS) + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (opcode <= OP_PLUS) + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + else + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); label = LABEL(); - compile_char1_hotpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0)) + compile_char1_matchingpath(common, type, cc, &nomatch); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (opcode <= OP_PLUS) + JUMPTO(SLJIT_JUMP, label); + else if (opcode == OP_CRRANGE && max == 0) { - OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); + OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); JUMPTO(SLJIT_JUMP, label); } else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, TMP1, 0, base, offset1); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label); } set_jumps(nomatch, LABEL()); - if (opcode == OP_PLUS || opcode == OP_CRRANGE) - add_jump(compiler, &fallback->topfallbacks, - CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (opcode == OP_CRRANGE) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1)); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); } - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); break; case OP_MINSTAR: case OP_MINPLUS: - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (opcode == OP_MINPLUS) - add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + if (private_data_ptr == 0) + allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); break; case OP_MINUPTO: case OP_CRMINRANGE: - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); if (opcode == OP_CRMINRANGE) - add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); + BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); break; case OP_QUERY: case OP_MINQUERY: - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + if (private_data_ptr == 0) + allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); if (opcode == OP_QUERY) - compile_char1_hotpath(common, type, cc, &fallback->topfallbacks); - FALLBACK_AS(iterator_fallback)->hotpath = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); break; case OP_EXACT: - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); label = LABEL(); - compile_char1_hotpath(common, type, cc, &fallback->topfallbacks); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, label); break; case OP_POSSTAR: case OP_POSPLUS: case OP_POSUPTO: - if (opcode != OP_POSSTAR) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); + if (opcode == OP_POSPLUS) + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + if (opcode == OP_POSUPTO) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); label = LABEL(); - compile_char1_hotpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &nomatch); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); if (opcode != OP_POSUPTO) - { - if (opcode == OP_POSPLUS) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2); JUMPTO(SLJIT_JUMP, label); - } else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, label); } set_jumps(nomatch, LABEL()); - if (opcode == OP_POSPLUS) - add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); break; case OP_POSQUERY: - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); - compile_char1_hotpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &nomatch); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); set_jumps(nomatch, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); break; + case OP_CRPOSRANGE: + /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */ + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, label); + + if (max != 0) + { + SLJIT_ASSERT(max - min > 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min); + } + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &nomatch); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + if (max == 0) + JUMPTO(SLJIT_JUMP, label); + else + { + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1); + JUMPTO(SLJIT_C_NOT_ZERO, label); + } + set_jumps(nomatch, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + break; + default: SLJIT_ASSERT_STOP(); break; } -decrease_call_count(common); +count_match(common); return end; } -static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; -PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL); +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); if (*cc == OP_FAIL) { - add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); return cc + 1; } if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL) { /* No need to check notempty conditions. */ - if (common->acceptlabel == NULL) + if (common->accept_label == NULL) add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); else - JUMPTO(SLJIT_JUMP, common->acceptlabel); + JUMPTO(SLJIT_JUMP, common->accept_label); return cc + 1; } -if (common->acceptlabel == NULL) +if (common->accept_label == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0))); else - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); -if (common->acceptlabel == NULL) +if (common->accept_label == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0)); else - CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel); + CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -if (common->acceptlabel == NULL) +if (common->accept_label == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); else - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel); -add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); + CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); return cc + 1; } -static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc) +static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc) { DEFINE_COMPILER; int offset = GET2(cc, 1); +BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; /* Data will be discarded anyway... */ if (common->currententry != NULL) return cc + 1 + IMM2_SIZE; -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset)); +if (!optimized_cbracket) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset)); offset <<= 1; OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); +if (!optimized_cbracket) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); return cc + 1 + IMM2_SIZE; } -static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; -fallback_common *fallback; +backtrack_common *backtrack; +pcre_uchar opcode = *cc; +pcre_uchar *ccend = cc + 1; +if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) + ccend += 2 + cc[1]; + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +if (opcode == OP_SKIP) + { + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + return ccend; + } + +if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + } + +return ccend; +} + +static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP }; + +static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +BOOL needs_control_head; +int size; + +PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); +common->then_trap = BACKTRACK_AS(then_trap_backtrack); +BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; +BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); +BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); + +size = BACKTRACK_AS(then_trap_backtrack)->framesize; +size = 3 + (size < 0 ? 0 : size); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); +allocate_stack(common, size); +if (size > 3) + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); +else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); + +size = BACKTRACK_AS(then_trap_backtrack)->framesize; +if (size >= 0) + init_frame(common, cc, ccend, size - 1, 0, FALSE); +} + +static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +BOOL has_then_trap = FALSE; +then_trap_backtrack *save_then_trap = NULL; + +SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); + +if (common->has_then && common->then_offsets[cc - common->start] != 0) + { + SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); + has_then_trap = TRUE; + save_then_trap = common->then_trap; + /* Tail item on backtrack. */ + compile_then_trap_matchingpath(common, cc, ccend, parent); + } + while (cc < ccend) { switch(*cc) @@ -5250,21 +7510,24 @@ while (cc < ccend) case OP_NOT: case OP_NOTI: case OP_REVERSE: - cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; case OP_SET_SOM: - PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc); + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); allocate_stack(common, 1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); cc++; break; case OP_CHAR: case OP_CHARI: - cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + if (common->mode == JIT_COMPILE) + cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; case OP_STAR: @@ -5332,48 +7595,67 @@ while (cc < ccend) case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: - cc = compile_iterator_hotpath(common, cc, parent); + cc = compile_iterator_matchingpath(common, cc, parent); break; case OP_CLASS: case OP_NCLASS: - if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE) - cc = compile_iterator_hotpath(common, cc, parent); + if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 case OP_XCLASS: - if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE) - cc = compile_iterator_hotpath(common, cc, parent); + if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); break; #endif case OP_REF: case OP_REFI: - if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) - cc = compile_ref_iterator_hotpath(common, cc, parent); + if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); else - cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE); + { + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + cc += 1 + IMM2_SIZE; + } break; + case OP_DNREF: + case OP_DNREFI: + if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); + else + { + compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + cc += 1 + 2 * IMM2_SIZE; + } + break; + case OP_RECURSE: - cc = compile_recurse_hotpath(common, cc, parent); + cc = compile_recurse_matchingpath(common, cc, parent); break; + case OP_CALLOUT: + cc = compile_callout_matchingpath(common, cc, parent); + break; + case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: - PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc); - cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE); + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); break; case OP_BRAMINZERO: - PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc); + PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); cc = bracketend(cc + 1); if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) { @@ -5386,9 +7668,9 @@ while (cc < ccend) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); } - FALLBACK_AS(braminzero_fallback)->hotpath = LABEL(); + BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); if (cc[1] > OP_ASSERTBACK_NOT) - decrease_call_count(common); + count_match(common); break; case OP_ONCE: @@ -5399,16 +7681,16 @@ while (cc < ccend) case OP_SBRA: case OP_SCBRA: case OP_SCOND: - cc = compile_bracket_hotpath(common, cc, parent); + cc = compile_bracket_matchingpath(common, cc, parent); break; case OP_BRAZERO: if (cc[1] > OP_ASSERTBACK_NOT) - cc = compile_bracket_hotpath(common, cc, parent); + cc = compile_bracket_matchingpath(common, cc, parent); else { - PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc); - cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE); + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); } break; @@ -5417,17 +7699,49 @@ while (cc < ccend) case OP_SBRAPOS: case OP_SCBRAPOS: case OP_BRAPOSZERO: - cc = compile_bracketpos_hotpath(common, cc, parent); + cc = compile_bracketpos_matchingpath(common, cc, parent); break; + case OP_MARK: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + SLJIT_ASSERT(common->mark_ptr != 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + allocate_stack(common, common->has_skip_arg ? 5 : 1); + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + if (common->has_skip_arg) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + } + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_SKIP: + case OP_SKIP_ARG: + case OP_THEN: + case OP_THEN_ARG: + case OP_COMMIT: + cc = compile_control_verb_matchingpath(common, cc, parent); + break; + case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - cc = compile_fail_accept_hotpath(common, cc, parent); + cc = compile_fail_accept_matchingpath(common, cc, parent); break; case OP_CLOSE: - cc = compile_close_hotpath(common, cc); + cc = compile_close_matchingpath(common, cc); break; case OP_SKIPZERO: @@ -5441,35 +7755,49 @@ while (cc < ccend) if (cc == NULL) return; } + +if (has_then_trap) + { + /* Head item on backtrack. */ + PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); + BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; + BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; + common->then_trap = save_then_trap; + } SLJIT_ASSERT(cc == ccend); } -#undef PUSH_FALLBACK -#undef PUSH_FALLBACK_NOVALUE -#undef FALLBACK_AS +#undef PUSH_BACKTRACK +#undef PUSH_BACKTRACK_NOVALUE +#undef BACKTRACK_AS -#define COMPILE_FALLBACKPATH(current) \ +#define COMPILE_BACKTRACKINGPATH(current) \ do \ { \ - compile_fallbackpath(common, (current)); \ + compile_backtrackingpath(common, (current)); \ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ return; \ } \ while (0) -#define CURRENT_AS(type) ((type*)current) +#define CURRENT_AS(type) ((type *)current) -static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; pcre_uchar opcode; pcre_uchar type; -int arg1 = -1, arg2 = -1; +int max = -1, min = -1; struct sljit_label *label = NULL; struct sljit_jump *jump = NULL; +jump_list *jumplist = NULL; +int private_data_ptr = PRIVATE_DATA(cc); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG); +int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); -cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL); +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL); switch(opcode) { @@ -5479,100 +7807,112 @@ switch(opcode) case OP_CRRANGE: if (type == OP_ANYNL || type == OP_EXTUNI) { - set_jumps(current->topfallbacks, LABEL()); + SLJIT_ASSERT(private_data_ptr == 0); + set_jumps(current->topbacktracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); } else { - if (opcode == OP_STAR || opcode == OP_UPTO) - arg2 = 0; - else if (opcode == OP_PLUS) - arg2 = 1; - jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1); - OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (opcode == OP_UPTO) + min = 0; + if (opcode <= OP_PLUS) + { + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, base, offset1); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1); + OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); + } skip_char_back(common); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); - if (opcode == OP_PLUS || opcode == OP_CRRANGE) - set_jumps(current->topfallbacks, LABEL()); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); + if (opcode == OP_CRRANGE) + set_jumps(current->topbacktracks, LABEL()); JUMPHERE(jump); - free_stack(common, 2); + if (private_data_ptr == 0) + free_stack(common, 2); + if (opcode == OP_PLUS) + set_jumps(current->topbacktracks, LABEL()); } break; case OP_MINSTAR: case OP_MINPLUS: - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + compile_char1_matchingpath(common, type, cc, &jumplist); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + if (private_data_ptr == 0) + free_stack(common, 1); if (opcode == OP_MINPLUS) - { - set_jumps(current->topfallbacks, LABEL()); - current->topfallbacks = NULL; - } - compile_char1_hotpath(common, type, cc, ¤t->topfallbacks); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); - set_jumps(current->topfallbacks, LABEL()); - free_stack(common, 1); + set_jumps(current->topbacktracks, LABEL()); break; case OP_MINUPTO: case OP_CRMINRANGE: if (opcode == OP_CRMINRANGE) { - set_jumps(current->topfallbacks, LABEL()); - current->topfallbacks = NULL; label = LABEL(); + set_jumps(current->topbacktracks, label); } - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - compile_char1_hotpath(common, type, cc, ¤t->topfallbacks); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + compile_char1_matchingpath(common, type, cc, &jumplist); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, base, offset1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, base, offset1, TMP1, 0); if (opcode == OP_CRMINRANGE) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label); + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label); - if (opcode == OP_CRMINRANGE && arg1 == 0) - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); + if (opcode == OP_CRMINRANGE && max == 0) + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); else - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath); - set_jumps(current->topfallbacks, LABEL()); - free_stack(common, 2); + set_jumps(jumplist, LABEL()); + if (private_data_ptr == 0) + free_stack(common, 2); break; case OP_QUERY: - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); jump = JUMP(SLJIT_JUMP); - set_jumps(current->topfallbacks, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); + set_jumps(current->topbacktracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); JUMPHERE(jump); - free_stack(common, 1); + if (private_data_ptr == 0) + free_stack(common, 1); break; case OP_MINQUERY: - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_hotpath(common, type, cc, ¤t->topfallbacks); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath); - set_jumps(current->topfallbacks, LABEL()); + compile_char1_matchingpath(common, type, cc, &jumplist); + JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); JUMPHERE(jump); - free_stack(common, 1); + if (private_data_ptr == 0) + free_stack(common, 1); break; case OP_EXACT: case OP_POSPLUS: - set_jumps(current->topfallbacks, LABEL()); + case OP_CRPOSRANGE: + set_jumps(current->topbacktracks, LABEL()); break; case OP_POSSTAR: @@ -5586,39 +7926,58 @@ switch(opcode) } } -static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current) +static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); pcre_uchar type; -type = cc[1 + IMM2_SIZE]; +type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; + if ((type & 0x1) == 0) { - set_jumps(current->topfallbacks, LABEL()); + /* Maximize case. */ + set_jumps(current->topbacktracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); return; } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); -CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath); -set_jumps(current->topfallbacks, LABEL()); -free_stack(common, 2); +CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); +set_jumps(current->topbacktracks, LABEL()); +free_stack(common, ref ? 2 : 3); } -static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current) +static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; -set_jumps(current->topfallbacks, LABEL()); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); -free_stack(common, 1); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); +if (CURRENT_AS(recurse_backtrack)->inlined_pattern) + compile_backtrackingpath(common, current->top); +set_jumps(current->topbacktracks, LABEL()); +if (CURRENT_AS(recurse_backtrack)->inlined_pattern) + return; + +if (common->has_set_som && common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); + } +else if (common->has_set_som || common->mark_ptr != 0) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); + } } -static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; @@ -5634,18 +7993,18 @@ if (*cc == OP_BRAZERO) if (bra == OP_BRAZERO) { - SLJIT_ASSERT(current->topfallbacks == NULL); + SLJIT_ASSERT(current->topbacktracks == NULL); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } -if (CURRENT_AS(assert_fallback)->framesize < 0) +if (CURRENT_AS(assert_backtrack)->framesize < 0) { - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); free_stack(common, 1); } return; @@ -5656,7 +8015,7 @@ if (bra == OP_BRAZERO) if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); free_stack(common, 1); return; } @@ -5666,33 +8025,32 @@ if (bra == OP_BRAZERO) if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); } else - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); if (bra == OP_BRAZERO) { /* We know there is enough place on the stack. */ - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); JUMPHERE(brajump); } } -static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current) +static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; -int opcode; +int opcode, stacksize, count; int offset = 0; -int localptr = CURRENT_AS(bracket_fallback)->localptr; -int stacksize; -int count; +int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; +int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; pcre_uchar *cc = current->cc; pcre_uchar *ccbegin; pcre_uchar *ccprev; @@ -5700,12 +8058,14 @@ jump_list *jumplist = NULL; jump_list *jumplistitem = NULL; pcre_uchar bra = OP_BRA; pcre_uchar ket; -assert_fallback *assert; +assert_backtrack *assert; BOOL has_alternatives; +BOOL needs_control_head = FALSE; struct sljit_jump *brazero = NULL; struct sljit_jump *once = NULL; struct sljit_jump *cond = NULL; -struct sljit_label *rminlabel = NULL; +struct sljit_label *rmin_label = NULL; +struct sljit_label *exact_label = NULL; if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -5714,12 +8074,24 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) } opcode = *cc; +ccbegin = bracketend(cc) - 1 - LINK_SIZE; +ket = *ccbegin; +if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) + { + repeat_ptr = PRIVATE_DATA(ccbegin); + repeat_type = PRIVATE_DATA(ccbegin + 2); + repeat_count = PRIVATE_DATA(ccbegin + 3); + SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); + if (repeat_type == OP_UPTO) + ket = OP_KETRMAX; + if (repeat_type == OP_MINUPTO) + ket = OP_KETRMIN; + } ccbegin = cc; -ket = *(bracketend(ccbegin) - 1 - LINK_SIZE); cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL; + has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; if (opcode == OP_CBRA || opcode == OP_SCBRA) offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) @@ -5727,11 +8099,27 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KE if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) opcode = OP_ONCE; -if (ket == OP_KETRMAX) +/* Decoding the needs_control_head in framesize. */ +if (opcode == OP_ONCE) { - if (bra != OP_BRAZERO) - free_stack(common, 1); + needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; + CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; + } + +if (ket != OP_KET && repeat_type != 0) + { + /* TMP1 is used in OP_KETRMIN below. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + if (repeat_type == OP_UPTO) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0); + } + +if (ket == OP_KETRMAX) + { + if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); @@ -5743,23 +8131,34 @@ else if (ket == OP_KETRMIN) if (bra != OP_BRAMINZERO) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - if (opcode >= OP_SBRA || opcode == OP_ONCE) + if (repeat_type != 0) { + /* TMP1 was set a few lines above. */ + CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + /* Drop STR_PTR for non-greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else if (opcode >= OP_SBRA || opcode == OP_ONCE) + { /* Checking zero-length iteration. */ - if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0) - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath); + if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); } + /* Drop STR_PTR for non-greedy plus quantifier. */ if (opcode != OP_ONCE) free_stack(common, 1); } else - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); } - rminlabel = LABEL(); + rmin_label = LABEL(); + if (repeat_type != 0) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); } else if (bra == OP_BRAZERO) { @@ -5767,12 +8166,40 @@ else if (bra == OP_BRAZERO) free_stack(common, 1); brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } +else if (repeat_type == OP_EXACT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); + exact_label = LABEL(); + } +if (offset != 0) + { + if (common->capture_last_ptr != 0) + { + SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + free_stack(common, 3); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); + } + else if (common->optimized_cbracket[offset >> 1] == 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); + } + } + if (SLJIT_UNLIKELY(opcode == OP_ONCE)) { - if (CURRENT_AS(bracket_fallback)->u.framesize >= 0) + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); } once = JUMP(SLJIT_JUMP); @@ -5825,9 +8252,9 @@ else if (*cc == OP_ALT) cc = ccbegin + GET(ccbegin, 1); } -COMPILE_FALLBACKPATH(current->top); -if (current->topfallbacks) - set_jumps(current->topfallbacks, LABEL()); +COMPILE_BACKTRACKINGPATH(current->top); +if (current->topbacktracks) + set_jumps(current->topbacktracks, LABEL()); if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { @@ -5835,21 +8262,21 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKEL if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) { SLJIT_ASSERT(has_alternatives); - assert = CURRENT_AS(bracket_fallback)->u.assert; + assert = CURRENT_AS(bracket_backtrack)->u.assert; if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); } cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL()); + set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); } - else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL) + else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) { SLJIT_ASSERT(has_alternatives); cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL()); + set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); } else SLJIT_ASSERT(!has_alternatives); @@ -5861,69 +8288,65 @@ if (has_alternatives) do { current->top = NULL; - current->topfallbacks = NULL; - current->nextfallbacks = NULL; + current->topbacktracks = NULL; + current->nextbacktracks = NULL; + /* Conditional blocks always have an additional alternative, even if it is empty. */ if (*cc == OP_ALT) { ccprev = cc + 1 + LINK_SIZE; cc += GET(cc, 1); if (opcode != OP_COND && opcode != OP_SCOND) { - if (localptr != 0 && opcode != OP_ONCE) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + if (opcode != OP_ONCE) + { + if (private_data_ptr != 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } else - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); } - compile_hotpath(common, ccprev, cc, current); + compile_matchingpath(common, ccprev, cc, current); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; } - /* Instructions after the current alternative is succesfully matched. */ - /* There is a similar code in compile_bracket_hotpath. */ + /* Instructions after the current alternative is successfully matched. */ + /* There is a similar code in compile_bracket_matchingpath. */ if (opcode == OP_ONCE) - { - if (CURRENT_AS(bracket_fallback)->u.framesize < 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); - /* TMP2 which is set here used by OP_KETRMAX below. */ - if (ket == OP_KETRMAX) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); - else if (ket == OP_KETRMIN) - { - /* Move the STR_PTR to the localptr. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0); - } - } - else - { - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w)); - if (ket == OP_KETRMAX) - { - /* TMP2 which is set here used by OP_KETRMAX below. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - } - } + match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); stacksize = 0; - if (opcode != OP_ONCE) + if (repeat_type == OP_MINUPTO) + { + /* We need to preserve the counter. TMP2 will be used below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr); stacksize++; + } if (ket != OP_KET || bra != OP_BRA) stacksize++; + if (offset != 0) + { + if (common->capture_last_ptr != 0) + stacksize++; + if (common->optimized_cbracket[offset >> 1] == 0) + stacksize += 2; + } + if (opcode != OP_ONCE) + stacksize++; - if (stacksize > 0) { - if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0) - allocate_stack(common, stacksize); - else - { - /* We know we have place at least for one item on the top of the stack. */ - SLJIT_ASSERT(stacksize == 1); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w)); - } - } + if (stacksize > 0) + allocate_stack(common, stacksize); stacksize = 0; + if (repeat_type == OP_MINUPTO) + { + /* TMP2 was set above. */ + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); + stacksize++; + } + if (ket != OP_KET || bra != OP_BRA) { if (ket != OP_KET) @@ -5933,17 +8356,20 @@ if (has_alternatives) stacksize++; } + if (offset != 0) + stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); + if (opcode != OP_ONCE) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++); - if (offset != 0) + if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr); + /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0); } - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); if (opcode != OP_ONCE) { @@ -5952,10 +8378,10 @@ if (has_alternatives) jumplist = jumplist->next; } - COMPILE_FALLBACKPATH(current->top); - if (current->topfallbacks) - set_jumps(current->topfallbacks, LABEL()); - SLJIT_ASSERT(!current->nextfallbacks); + COMPILE_BACKTRACKINGPATH(current->top); + if (current->topbacktracks) + set_jumps(current->topbacktracks, LABEL()); + SLJIT_ASSERT(!current->nextbacktracks); } while (*cc == OP_ALT); SLJIT_ASSERT(!jumplist); @@ -5963,87 +8389,105 @@ if (has_alternatives) if (cond != NULL) { SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); - assert = CURRENT_AS(bracket_fallback)->u.assert; + assert = CURRENT_AS(bracket_backtrack)->u.assert; if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) - { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); } JUMPHERE(cond); } /* Free the STR_PTR. */ - if (localptr == 0) + if (private_data_ptr == 0) free_stack(common, 1); } if (offset != 0) { /* Using both tmp register is better for instruction scheduling. */ - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2)); - free_stack(common, 3); + if (common->optimized_cbracket[offset >> 1] != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); + } } else if (opcode == OP_SBRA || opcode == OP_SCOND) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); } else if (opcode == OP_ONCE) { cc = ccbegin + GET(ccbegin, 1); - if (CURRENT_AS(bracket_fallback)->u.framesize >= 0) + stacksize = needs_control_head ? 1 : 0; + + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) { /* Reset head and drop saved frame. */ - stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1; - free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize); + stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); } else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) { /* The STR_PTR must be released. */ - free_stack(common, 1); + stacksize++; } + free_stack(common, stacksize); JUMPHERE(once); - /* Restore previous localptr */ - if (CURRENT_AS(bracket_fallback)->u.framesize >= 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w)); + /* Restore previous private_data_ptr */ + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); else if (ket == OP_KETRMIN) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); /* See the comment below. */ free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); } } -if (ket == OP_KETRMAX) +if (repeat_type == OP_EXACT) { + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0); + CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); + } +else if (ket == OP_KETRMAX) + { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath); + if (bra != OP_BRAZERO) + free_stack(common, 1); + + CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); JUMPHERE(brazero); + free_stack(common, 1); } - free_stack(common, 1); } else if (ket == OP_KETRMIN) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - /* OP_ONCE removes everything in case of a fallback, so we don't + /* OP_ONCE removes everything in case of a backtrack, so we don't need to explicitly release the STR_PTR. The extra release would affect badly the free_stack(2) above. */ if (opcode != OP_ONCE) free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel); + CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); if (opcode == OP_ONCE) free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); else if (bra == OP_BRAMINZERO) @@ -6052,18 +8496,18 @@ else if (ket == OP_KETRMIN) else if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); JUMPHERE(brazero); } } -static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current) +static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; int offset; struct sljit_jump *jump; -if (CURRENT_AS(bracketpos_fallback)->framesize < 0) +if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) { if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) { @@ -6071,59 +8515,157 @@ if (CURRENT_AS(bracketpos_fallback)->framesize < 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0); } - set_jumps(current->topfallbacks, LABEL()); - free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize); + set_jumps(current->topbacktracks, LABEL()); + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); return; } -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); -if (current->topfallbacks) +if (current->topbacktracks) { jump = JUMP(SLJIT_JUMP); - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); /* Drop the stack frame. */ - free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize); + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); JUMPHERE(jump); } -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); } -static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current) +static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) { -assert_fallback fallback; +assert_backtrack backtrack; current->top = NULL; -current->topfallbacks = NULL; -current->nextfallbacks = NULL; +current->topbacktracks = NULL; +current->nextbacktracks = NULL; if (current->cc[1] > OP_ASSERTBACK_NOT) { - /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */ - compile_bracket_hotpath(common, current->cc, current); - compile_bracket_fallbackpath(common, current->top); + /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ + compile_bracket_matchingpath(common, current->cc, current); + compile_bracket_backtrackingpath(common, current->top); } else { - memset(&fallback, 0, sizeof(fallback)); - fallback.common.cc = current->cc; - fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath; - /* Manual call of compile_assert_hotpath. */ - compile_assert_hotpath(common, current->cc, &fallback, FALSE); + memset(&backtrack, 0, sizeof(backtrack)); + backtrack.common.cc = current->cc; + backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; + /* Manual call of compile_assert_matchingpath. */ + compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); } -SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks); +SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); } -static void compile_fallbackpath(compiler_common *common, struct fallback_common *current) +static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; +pcre_uchar opcode = *current->cc; +struct sljit_label *loop; +struct sljit_jump *jump; +if (opcode == OP_THEN || opcode == OP_THEN_ARG) + { + if (common->then_trap != NULL) + { + SLJIT_ASSERT(common->control_head_ptr != 0); + + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); + jump = JUMP(SLJIT_JUMP); + + loop = LABEL(); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw)); + JUMPHERE(jump); + CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop); + CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop); + add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); + return; + } + else if (common->positive_assert) + { + add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP)); + return; + } + } + +if (common->local_exit) + { + if (common->quit_label == NULL) + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->quit_label); + return; + } + +if (opcode == OP_SKIP_ARG) + { + SLJIT_ASSERT(common->control_head_ptr != 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); + sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); + + OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); + add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1)); + return; + } + +if (opcode == OP_SKIP) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); +add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); +} + +static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +int size; + +if (CURRENT_AS(then_trap_backtrack)->then_trap) + { + common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; + return; + } + +size = CURRENT_AS(then_trap_backtrack)->framesize; +size = 3 + (size < 0 ? 0 : size); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); +free_stack(common, size); +jump = JUMP(SLJIT_JUMP); + +set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); +/* STACK_TOP is set by THEN. */ +if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +free_stack(common, 3); + +JUMPHERE(jump); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0); +} + +static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +then_trap_backtrack *save_then_trap = common->then_trap; + while (current) { - if (current->nextfallbacks != NULL) - set_jumps(current->nextfallbacks, LABEL()); + if (current->nextbacktracks != NULL) + set_jumps(current->nextbacktracks, LABEL()); switch(*current->cc) { case OP_SET_SOM: @@ -6202,23 +8744,25 @@ while (current) #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: #endif - compile_iterator_fallbackpath(common, current); + compile_iterator_backtrackingpath(common, current); break; case OP_REF: case OP_REFI: - compile_ref_iterator_fallbackpath(common, current); + case OP_DNREF: + case OP_DNREFI: + compile_ref_iterator_backtrackingpath(common, current); break; case OP_RECURSE: - compile_recurse_fallbackpath(common, current); + compile_recurse_backtrackingpath(common, current); break; case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: - compile_assert_fallbackpath(common, current); + compile_assert_backtrackingpath(common, current); break; case OP_ONCE: @@ -6229,14 +8773,14 @@ while (current) case OP_SBRA: case OP_SCBRA: case OP_SCOND: - compile_bracket_fallbackpath(common, current); + compile_bracket_backtrackingpath(common, current); break; case OP_BRAZERO: if (current->cc[1] > OP_ASSERTBACK_NOT) - compile_bracket_fallbackpath(common, current); + compile_bracket_backtrackingpath(common, current); else - compile_assert_fallbackpath(common, current); + compile_assert_backtrackingpath(common, current); break; case OP_BRAPOS: @@ -6244,25 +8788,60 @@ while (current) case OP_SBRAPOS: case OP_SCBRAPOS: case OP_BRAPOSZERO: - compile_bracketpos_fallbackpath(common, current); + compile_bracketpos_backtrackingpath(common, current); break; case OP_BRAMINZERO: - compile_braminzero_fallbackpath(common, current); + compile_braminzero_backtrackingpath(common, current); break; + case OP_MARK: + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); + if (common->has_skip_arg) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, common->has_skip_arg ? 5 : 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); + if (common->has_skip_arg) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0); + break; + + case OP_THEN: + case OP_THEN_ARG: + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_SKIP: + case OP_SKIP_ARG: + compile_control_verb_backtrackingpath(common, current); + break; + + case OP_COMMIT: + if (!common->local_exit) + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); + if (common->quit_label == NULL) + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->quit_label); + break; + + case OP_CALLOUT: case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - set_jumps(current->topfallbacks, LABEL()); + set_jumps(current->topbacktracks, LABEL()); break; + case OP_THEN_TRAP: + /* A virtual opcode for then traps. */ + compile_then_trap_backtrackingpath(common, current); + break; + default: SLJIT_ASSERT_STOP(); break; } current = current->prev; } +common->then_trap = save_then_trap; } static SLJIT_INLINE void compile_recurse(compiler_common *common) @@ -6271,112 +8850,151 @@ DEFINE_COMPILER; pcre_uchar *cc = common->start + common->currententry->start; pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); pcre_uchar *ccend = bracketend(cc); -int localsize = get_localsize(common, ccbegin, ccend); -int framesize = get_framesize(common, cc, TRUE); +BOOL needs_control_head; +int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); +int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); int alternativesize; -BOOL needsframe; -fallback_common altfallback; +BOOL needs_frame; +backtrack_common altbacktrack; struct sljit_jump *jump; +/* Recurse captures then. */ +common->then_trap = NULL; + SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); -needsframe = framesize >= 0; -if (!needsframe) +needs_frame = framesize >= 0; +if (!needs_frame) framesize = 0; alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; -SLJIT_ASSERT(common->currententry->entry == NULL); +SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0); common->currententry->entry = LABEL(); set_jumps(common->currententry->calls, common->currententry->entry); -sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize); -allocate_stack(common, localsize + framesize + alternativesize); -OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0); -copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0); -if (needsframe) - init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE); +sljit_emit_fast_enter(compiler, TMP2, 0); +allocate_stack(common, private_data_size + framesize + alternativesize); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); +copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0); +if (needs_frame) + init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE); if (alternativesize > 0) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); -memset(&altfallback, 0, sizeof(fallback_common)); -common->acceptlabel = NULL; +memset(&altbacktrack, 0, sizeof(backtrack_common)); +common->quit_label = NULL; +common->accept_label = NULL; +common->quit = NULL; common->accept = NULL; -altfallback.cc = ccbegin; +altbacktrack.cc = ccbegin; cc += GET(cc, 1); while (1) { - altfallback.top = NULL; - altfallback.topfallbacks = NULL; + altbacktrack.top = NULL; + altbacktrack.topbacktracks = NULL; - if (altfallback.cc != ccbegin) + if (altbacktrack.cc != ccbegin) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - compile_hotpath(common, altfallback.cc, cc, &altfallback); + compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); - compile_fallbackpath(common, altfallback.top); + compile_backtrackingpath(common, altbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - set_jumps(altfallback.topfallbacks, LABEL()); + set_jumps(altbacktrack.topbacktracks, LABEL()); if (*cc != OP_ALT) break; - altfallback.cc = cc + 1 + LINK_SIZE; + altbacktrack.cc = cc + 1 + LINK_SIZE; cc += GET(cc, 1); } + /* None of them matched. */ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); jump = JUMP(SLJIT_JUMP); +if (common->quit != NULL) + { + set_jumps(common->quit, LABEL()); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr); + if (needs_frame) + { + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); + } + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); + common->quit = NULL; + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + } + set_jumps(common->accept, LABEL()); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD); -if (needsframe) +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr); +if (needs_frame) { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w)); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); } OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); JUMPHERE(jump); -copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize); -free_stack(common, localsize + framesize + alternativesize); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w)); -OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0); +if (common->quit != NULL) + set_jumps(common->quit, LABEL()); +copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); +free_stack(common, private_data_size + framesize + alternativesize); +if (needs_control_head) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0); + } +else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0); + } sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); } -#undef COMPILE_FALLBACKPATH +#undef COMPILE_BACKTRACKINGPATH #undef CURRENT_AS void -PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra) +PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) { struct sljit_compiler *compiler; -fallback_common rootfallback; +backtrack_common rootbacktrack; compiler_common common_data; compiler_common *common = &common_data; const pcre_uint8 *tables = re->tables; pcre_study_data *study; +int private_data_size; pcre_uchar *ccend; -executable_function *function; +executable_functions *functions; void *executable_func; sljit_uw executable_size; -struct sljit_label *leave; -struct sljit_label *mainloop = NULL; -struct sljit_label *empty_match_found; -struct sljit_label *empty_match_fallback; -struct sljit_jump *alloc_error; +struct sljit_label *mainloop_label = NULL; +struct sljit_label *continue_match_label; +struct sljit_label *empty_match_found_label; +struct sljit_label *empty_match_backtrack_label; +struct sljit_label *reset_match_label; +struct sljit_jump *jump; +struct sljit_jump *minlength_check_failed = NULL; struct sljit_jump *reqbyte_notfound = NULL; struct sljit_jump *empty_match; +struct sljit_label *quit_label; SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); study = extra->study_data; @@ -6384,20 +9002,20 @@ study = extra->study_data; if (!tables) tables = PRIV(default_tables); -memset(&rootfallback, 0, sizeof(fallback_common)); -rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; +memset(&rootbacktrack, 0, sizeof(backtrack_common)); +memset(common, 0, sizeof(compiler_common)); +rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; -common->compiler = NULL; -common->start = rootfallback.cc; -common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w); +common->start = rootbacktrack.cc; common->fcc = tables + fcc_offset; -common->lcc = (sljit_w)(tables + lcc_offset); +common->lcc = (sljit_sw)(tables + lcc_offset); +common->mode = mode; common->nltype = NLTYPE_FIXED; switch(re->options & PCRE_NEWLINE_BITS) { case 0: /* Compile-time default */ - switch (NEWLINE) + switch(NEWLINE) { case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; @@ -6425,181 +9043,325 @@ else #endif } common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; -common->ctypes = (sljit_w)(tables + ctypes_offset); -common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset); +common->ctypes = (sljit_sw)(tables + ctypes_offset); +common->digits[0] = -2; +common->name_table = ((pcre_uchar *)re) + re->name_table_offset; common->name_count = re->name_count; common->name_entry_size = re->name_entry_size; -common->acceptlabel = NULL; -common->stubs = NULL; -common->entries = NULL; -common->currententry = NULL; -common->accept = NULL; -common->calllimit = NULL; -common->stackalloc = NULL; -common->revertframes = NULL; -common->wordboundary = NULL; -common->anynewline = NULL; -common->hspace = NULL; -common->vspace = NULL; -common->casefulcmp = NULL; -common->caselesscmp = NULL; common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; #ifdef SUPPORT_UTF -/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ common->utf = (re->options & PCRE_UTF8) != 0; #ifdef SUPPORT_UCP common->use_ucp = (re->options & PCRE_UCP) != 0; #endif -common->utfreadchar = NULL; -#ifdef COMPILE_PCRE8 -common->utfreadtype8 = NULL; -#endif #endif /* SUPPORT_UTF */ -#ifdef SUPPORT_UCP -common->getucd = NULL; +ccend = bracketend(rootbacktrack.cc); + +/* Calculate the local space size on the stack. */ +common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); +common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1); +if (!common->optimized_cbracket) + return; +#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 +memset(common->optimized_cbracket, 0, re->top_bracket + 1); +#else +memset(common->optimized_cbracket, 1, re->top_bracket + 1); #endif -ccend = bracketend(rootfallback.cc); -SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); -common->localsize = get_localspace(common, rootfallback.cc, ccend); -if (common->localsize < 0) + +SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); +#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 +common->capture_last_ptr = common->ovector_start; +common->ovector_start += sizeof(sljit_sw); +#endif +if (!check_opcode_types(common, rootbacktrack.cc, ccend)) + { + SLJIT_FREE(common->optimized_cbracket); return; -common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w); -if (common->localsize > SLJIT_MAX_LOCAL_SIZE) + } + +/* Checking flags and updating ovector_start. */ +if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) + { + common->req_char_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +if (mode != JIT_COMPILE) + { + common->start_used_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + if (mode == JIT_PARTIAL_SOFT_COMPILE) + { + common->hit_start = common->ovector_start; + common->ovector_start += 2 * sizeof(sljit_sw); + } + else + { + SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE); + common->needs_start_ptr = TRUE; + } + } +if ((re->options & PCRE_FIRSTLINE) != 0) + { + common->first_line_end = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +common->control_head_ptr = 1; +#endif +if (common->control_head_ptr != 0) + { + common->control_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +if (common->needs_start_ptr && common->has_set_som) + { + /* Saving the real start pointer is necessary. */ + common->start_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +else + common->needs_start_ptr = FALSE; + +/* Aligning ovector to even number of sljit words. */ +if ((common->ovector_start & sizeof(sljit_sw)) != 0) + common->ovector_start += sizeof(sljit_sw); + +if (common->start_ptr == 0) + common->start_ptr = OVECTOR(0); + +/* Capturing brackets cannot be optimized if callouts are allowed. */ +if (common->capture_last_ptr != 0) + memset(common->optimized_cbracket, 0, re->top_bracket + 1); + +SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); +common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); + +common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si)); +if (!common->private_data_ptrs) + { + SLJIT_FREE(common->optimized_cbracket); return; -common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int)); -if (!common->localptrs) + } +memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int)); + +private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); +set_private_data_ptrs(common, &private_data_size, ccend); +if (private_data_size > SLJIT_MAX_LOCAL_SIZE) + { + SLJIT_FREE(common->private_data_ptrs); + SLJIT_FREE(common->optimized_cbracket); return; -memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int)); -set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend); + } +if (common->has_then) + { + common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc); + if (!common->then_offsets) + { + SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->private_data_ptrs); + return; + } + memset(common->then_offsets, 0, ccend - rootbacktrack.cc); + set_then_offsets(common, rootbacktrack.cc, NULL); + } + compiler = sljit_create_compiler(); if (!compiler) { - SLJIT_FREE(common->localptrs); + SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->private_data_ptrs); + if (common->has_then) + SLJIT_FREE(common->then_offsets); return; } common->compiler = compiler; /* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 1, 5, 5, common->localsize); +sljit_emit_enter(compiler, 1, 5, 5, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); -if ((re->flags & PCRE_REQCHSET) != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0); +if (common->req_char_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0); OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit)); +OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0); +if (mode == JIT_PARTIAL_SOFT_COMPILE) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0); +if (common->control_head_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + /* Main part of the matching */ if ((re->options & PCRE_ANCHORED) == 0) { - mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); + mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); + continue_match_label = LABEL(); /* Forward search if possible. */ - if ((re->flags & PCRE_FIRSTSET) != 0) - fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) != 0) - fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) - fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) + { + if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0)) + { /* Do nothing */ } + else if ((re->flags & PCRE_FIRSTSET) != 0) + fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); + else if ((re->flags & PCRE_STARTLINE) != 0) + fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); + else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) + fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + } } -if ((re->flags & PCRE_REQCHSET) != 0) - reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); +else + continue_match_label = LABEL(); +if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) + { + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); + minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0); + } +if (common->req_char_ptr != 0) + reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); + /* Store the current STR_PTR in OVECTOR(0). */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); /* Copy the limit of allowed recursions. */ -OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT); +OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH); +if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1); -compile_hotpath(common, rootfallback.cc, ccend, &rootfallback); +if (common->needs_start_ptr) + { + SLJIT_ASSERT(common->start_ptr != OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0); + } +else + SLJIT_ASSERT(common->start_ptr == OVECTOR(0)); + +/* Copy the beginning of the string. */ +if (mode == JIT_PARTIAL_SOFT_COMPILE) + { + jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); + JUMPHERE(jump); + } +else if (mode == JIT_PARTIAL_HARD_COMPILE) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + +compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->localptrs); + SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->private_data_ptrs); + if (common->has_then) + SLJIT_FREE(common->then_offsets); return; } empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); -empty_match_found = LABEL(); +empty_match_found_label = LABEL(); -common->acceptlabel = LABEL(); +common->accept_label = LABEL(); if (common->accept != NULL) - set_jumps(common->accept, common->acceptlabel); + set_jumps(common->accept, common->accept_label); /* This means we have a match. Update the ovector. */ copy_ovector(common, re->top_bracket + 1); -leave = LABEL(); +common->quit_label = common->forced_quit_label = LABEL(); +if (common->quit != NULL) + set_jumps(common->quit, common->quit_label); +if (common->forced_quit != NULL) + set_jumps(common->forced_quit, common->forced_quit_label); +if (minlength_check_failed != NULL) + SET_LABEL(minlength_check_failed, common->forced_quit_label); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); -empty_match_fallback = LABEL(); -compile_fallbackpath(common, rootfallback.top); +if (mode != JIT_COMPILE) + { + common->partialmatchlabel = LABEL(); + set_jumps(common->partialmatch, common->partialmatchlabel); + return_with_partial_match(common, common->quit_label); + } + +empty_match_backtrack_label = LABEL(); +compile_backtrackingpath(common, rootbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->localptrs); + SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->private_data_ptrs); + if (common->has_then) + SLJIT_FREE(common->then_offsets); return; } -SLJIT_ASSERT(rootfallback.prev == NULL); +SLJIT_ASSERT(rootbacktrack.prev == NULL); +reset_match_label = LABEL(); +if (mode == JIT_PARTIAL_SOFT_COMPILE) + { + /* Update hit_start only in the first time. */ + jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0); + JUMPHERE(jump); + } + /* Check we have remaining characters. */ -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); +if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0) + { + SLJIT_ASSERT(common->first_line_end != 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); + } +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr); + if ((re->options & PCRE_ANCHORED) == 0) { if ((re->options & PCRE_FIRSTLINE) == 0) - { - if (study != NULL && study->minlength > 1) - { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); - CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop); - } - else - CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); - } + CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label); else - { - if (study != NULL && study->minlength > 1) - { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); - COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); - COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL); - JUMPTO(SLJIT_C_ZERO, mainloop); - } - else - CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop); - } + CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label); } +/* No more remaining characters. */ if (reqbyte_notfound != NULL) JUMPHERE(reqbyte_notfound); -/* Copy OVECTOR(1) to OVECTOR(0) */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); + +if (mode == JIT_PARTIAL_SOFT_COMPILE) + CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); -JUMPTO(SLJIT_JUMP, leave); +JUMPTO(SLJIT_JUMP, common->quit_label); flush_stubs(common); JUMPHERE(empty_match); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback); +CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); -CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found); +CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found); -JUMPTO(SLJIT_JUMP, empty_match_fallback); +CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); +JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); common->currententry = common->entries; +common->local_exit = TRUE; +quit_label = common->quit_label; while (common->currententry != NULL) { /* Might add new entries. */ @@ -6607,18 +9369,23 @@ while (common->currententry != NULL) if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->localptrs); + SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->private_data_ptrs); + if (common->has_then) + SLJIT_FREE(common->then_offsets); return; } flush_stubs(common); common->currententry = common->currententry->next; } +common->local_exit = FALSE; +common->quit_label = quit_label; /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize); +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); @@ -6626,7 +9393,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); -alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); @@ -6635,15 +9402,15 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); /* Allocation failed. */ -JUMPHERE(alloc_error); +JUMPHERE(jump); /* We break the return address cache here, but this is a really rare case. */ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); -JUMPTO(SLJIT_JUMP, leave); +JUMPTO(SLJIT_JUMP, common->quit_label); /* Call limit reached. */ set_jumps(common->calllimit, LABEL()); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); -JUMPTO(SLJIT_JUMP, leave); +JUMPTO(SLJIT_JUMP, common->quit_label); if (common->revertframes != NULL) { @@ -6680,20 +9447,30 @@ if (common->caselesscmp != NULL) set_jumps(common->caselesscmp, LABEL()); do_caselesscmp(common); } +if (common->reset_match != NULL) + { + set_jumps(common->reset_match, LABEL()); + do_reset_match(common, (re->top_bracket + 1) * 2); + CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); + OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); + JUMPTO(SLJIT_JUMP, reset_match_label); + } #ifdef SUPPORT_UTF +#ifndef COMPILE_PCRE32 if (common->utfreadchar != NULL) { set_jumps(common->utfreadchar, LABEL()); do_utfreadchar(common); } +#endif /* !COMPILE_PCRE32 */ #ifdef COMPILE_PCRE8 if (common->utfreadtype8 != NULL) { set_jumps(common->utfreadtype8, LABEL()); do_utfreadtype8(common); } -#endif #endif /* COMPILE_PCRE8 */ +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP if (common->getucd != NULL) { @@ -6702,117 +9479,246 @@ if (common->getucd != NULL) } #endif -SLJIT_FREE(common->localptrs); +SLJIT_FREE(common->optimized_cbracket); +SLJIT_FREE(common->private_data_ptrs); +if (common->has_then) + SLJIT_FREE(common->then_offsets); + executable_func = sljit_generate_code(compiler); executable_size = sljit_get_generated_code_size(compiler); sljit_free_compiler(compiler); if (executable_func == NULL) return; -function = SLJIT_MALLOC(sizeof(executable_function)); -if (function == NULL) +/* Reuse the function descriptor if possible. */ +if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) + functions = (executable_functions *)extra->executable_jit; +else { - /* This case is highly unlikely since we just recently - freed a lot of memory. Although not impossible. */ - sljit_free_code(executable_func); - return; + /* Note: If your memory-checker has flagged the allocation below as a + * memory leak, it is probably because you either forgot to call + * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or + * pcre16_extra) object, or you called said function after having + * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field + * of the object. (The function will only free the JIT data if the + * bit remains set, as the bit indicates that the pointer to the data + * is valid.) + */ + functions = SLJIT_MALLOC(sizeof(executable_functions)); + if (functions == NULL) + { + /* This case is highly unlikely since we just recently + freed a lot of memory. Although not impossible. */ + sljit_free_code(executable_func); + return; + } + memset(functions, 0, sizeof(executable_functions)); + functions->top_bracket = (re->top_bracket + 1) * 2; + functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0; + extra->executable_jit = functions; + extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; } -function->executable_func = executable_func; -function->executable_size = executable_size; -function->callback = NULL; -function->userdata = NULL; -extra->executable_jit = function; -extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; +functions->executable_funcs[mode] = executable_func; +functions->executable_sizes[mode] = executable_size; } -static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function) +static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func) { union { void* executable_func; jit_function call_executable_func; } convert_executable_func; -pcre_uint8 local_area[LOCAL_SPACE_SIZE]; +pcre_uint8 local_space[MACHINE_STACK_SIZE]; struct sljit_stack local_stack; -local_stack.top = (sljit_w)&local_area; +local_stack.top = (sljit_sw)&local_space; local_stack.base = local_stack.top; -local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE; +local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; local_stack.max_limit = local_stack.limit; arguments->stack = &local_stack; -convert_executable_func.executable_func = function->executable_func; +convert_executable_func.executable_func = executable_func; return convert_executable_func.call_executable_func(arguments); } int -PRIV(jit_exec)(const REAL_PCRE *re, void *executable_func, - const pcre_uchar *subject, int length, int start_offset, int options, - int match_limit, int *offsets, int offsetcount) +PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, + int length, int start_offset, int options, int *offsets, int offset_count) { -executable_function *function = (executable_function*)executable_func; +executable_functions *functions = (executable_functions *)extra_data->executable_jit; union { void* executable_func; jit_function call_executable_func; } convert_executable_func; jit_arguments arguments; -int maxoffsetcount; +int max_offset_count; int retval; +int mode = JIT_COMPILE; +if ((options & PCRE_PARTIAL_HARD) != 0) + mode = JIT_PARTIAL_HARD_COMPILE; +else if ((options & PCRE_PARTIAL_SOFT) != 0) + mode = JIT_PARTIAL_SOFT_COMPILE; + +if (functions->executable_funcs[mode] == NULL) + return PCRE_ERROR_JIT_BADOPTION; + /* Sanity checks should be handled by pcre_exec. */ -arguments.stack = NULL; arguments.str = subject + start_offset; arguments.begin = subject; arguments.end = subject + length; -arguments.calllimit = match_limit; /* JIT decreases this value less times. */ +arguments.mark_ptr = NULL; +/* JIT decreases this value less frequently than the interpreter. */ +arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); +if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) + arguments.limit_match = functions->limit_match; arguments.notbol = (options & PCRE_NOTBOL) != 0; arguments.noteol = (options & PCRE_NOTEOL) != 0; arguments.notempty = (options & PCRE_NOTEMPTY) != 0; arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; arguments.offsets = offsets; +arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; +arguments.real_offset_count = offset_count; -/* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of +/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of the output vector for storing captured strings, with the remainder used as workspace. We don't need the workspace here. For compatibility, we limit the number of captured strings in the same way as pcre_exec(), so that the user gets the same result with and without JIT. */ -if (offsetcount != 2) - offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3; -maxoffsetcount = (re->top_bracket + 1) * 2; -if (offsetcount > maxoffsetcount) - offsetcount = maxoffsetcount; -arguments.offsetcount = offsetcount; +if (offset_count != 2) + offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; +max_offset_count = functions->top_bracket; +if (offset_count > max_offset_count) + offset_count = max_offset_count; +arguments.offset_count = offset_count; -if (function->callback) - arguments.stack = (struct sljit_stack*)function->callback(function->userdata); +if (functions->callback) + arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); else - arguments.stack = (struct sljit_stack*)function->userdata; + arguments.stack = (struct sljit_stack *)functions->userdata; if (arguments.stack == NULL) - retval = jit_machine_stack_exec(&arguments, function); + retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); else { - convert_executable_func.executable_func = function->executable_func; + convert_executable_func.executable_func = functions->executable_funcs[mode]; retval = convert_executable_func.call_executable_func(&arguments); } -if (retval * 2 > offsetcount) +if (retval * 2 > offset_count) retval = 0; +if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) + *(extra_data->mark) = arguments.mark_ptr; + return retval; } +#if defined COMPILE_PCRE8 +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data, + PCRE_SPTR subject, int length, int start_offset, int options, + int *offsets, int offset_count, pcre_jit_stack *stack) +#elif defined COMPILE_PCRE16 +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, + PCRE_SPTR16 subject, int length, int start_offset, int options, + int *offsets, int offset_count, pcre16_jit_stack *stack) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, + PCRE_SPTR32 subject, int length, int start_offset, int options, + int *offsets, int offset_count, pcre32_jit_stack *stack) +#endif +{ +pcre_uchar *subject_ptr = (pcre_uchar *)subject; +executable_functions *functions = (executable_functions *)extra_data->executable_jit; +union { + void* executable_func; + jit_function call_executable_func; +} convert_executable_func; +jit_arguments arguments; +int max_offset_count; +int retval; +int mode = JIT_COMPILE; + +SLJIT_UNUSED_ARG(argument_re); + +/* Plausibility checks */ +if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION; + +if ((options & PCRE_PARTIAL_HARD) != 0) + mode = JIT_PARTIAL_HARD_COMPILE; +else if ((options & PCRE_PARTIAL_SOFT) != 0) + mode = JIT_PARTIAL_SOFT_COMPILE; + +if (functions->executable_funcs[mode] == NULL) + return PCRE_ERROR_JIT_BADOPTION; + +/* Sanity checks should be handled by pcre_exec. */ +arguments.stack = (struct sljit_stack *)stack; +arguments.str = subject_ptr + start_offset; +arguments.begin = subject_ptr; +arguments.end = subject_ptr + length; +arguments.mark_ptr = NULL; +/* JIT decreases this value less frequently than the interpreter. */ +arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); +if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) + arguments.limit_match = functions->limit_match; +arguments.notbol = (options & PCRE_NOTBOL) != 0; +arguments.noteol = (options & PCRE_NOTEOL) != 0; +arguments.notempty = (options & PCRE_NOTEMPTY) != 0; +arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; +arguments.offsets = offsets; +arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; +arguments.real_offset_count = offset_count; + +/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of +the output vector for storing captured strings, with the remainder used as +workspace. We don't need the workspace here. For compatibility, we limit the +number of captured strings in the same way as pcre_exec(), so that the user +gets the same result with and without JIT. */ + +if (offset_count != 2) + offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; +max_offset_count = functions->top_bracket; +if (offset_count > max_offset_count) + offset_count = max_offset_count; +arguments.offset_count = offset_count; + +convert_executable_func.executable_func = functions->executable_funcs[mode]; +retval = convert_executable_func.call_executable_func(&arguments); + +if (retval * 2 > offset_count) + retval = 0; +if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) + *(extra_data->mark) = arguments.mark_ptr; + +return retval; +} + void -PRIV(jit_free)(void *executable_func) +PRIV(jit_free)(void *executable_funcs) { -executable_function *function = (executable_function*)executable_func; -sljit_free_code(function->executable_func); -SLJIT_FREE(function); +int i; +executable_functions *functions = (executable_functions *)executable_funcs; +for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) + { + if (functions->executable_funcs[i] != NULL) + sljit_free_code(functions->executable_funcs[i]); + } +SLJIT_FREE(functions); } int -PRIV(jit_get_size)(void *executable_func) +PRIV(jit_get_size)(void *executable_funcs) { -return ((executable_function*)executable_func)->executable_size; +int i; +sljit_uw size = 0; +sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; +for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) + size += executable_sizes[i]; +return (int)size; } const char* @@ -6821,12 +9727,15 @@ PRIV(jit_get_target)(void) return sljit_get_platform_name(); } -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DECL pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DECL pcre16_jit_stack * pcre16_jit_stack_alloc(int startsize, int maxsize) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL pcre32_jit_stack * +pcre32_jit_stack_alloc(int startsize, int maxsize) #endif { if (startsize < 1 || maxsize < 1) @@ -6838,47 +9747,70 @@ maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_ return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize); } -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *stack) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *stack) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_jit_stack_free(pcre32_jit_stack *stack) #endif { -sljit_free_stack((struct sljit_stack*)stack); +sljit_free_stack((struct sljit_stack *)stack); } -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) #endif { -executable_function *function; +executable_functions *functions; if (extra != NULL && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) { - function = (executable_function*)extra->executable_jit; - function->callback = callback; - function->userdata = userdata; + functions = (executable_functions *)extra->executable_jit; + functions->callback = callback; + functions->userdata = userdata; } } +#if defined COMPILE_PCRE8 +PCRE_EXP_DECL void +pcre_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE16 +PCRE_EXP_DECL void +pcre16_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_jit_free_unused_memory(void) +#endif +{ +sljit_free_unused_memory_exec(); +} + #else /* SUPPORT_JIT */ /* These are dummy functions to avoid linking errors when JIT support is not being compiled. */ -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DECL pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DECL pcre16_jit_stack * pcre16_jit_stack_alloc(int startsize, int maxsize) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL pcre32_jit_stack * +pcre32_jit_stack_alloc(int startsize, int maxsize) #endif { (void)startsize; @@ -6886,28 +9818,47 @@ pcre16_jit_stack_alloc(int startsize, int maxsize) return NULL; } -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *stack) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *stack) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_jit_stack_free(pcre32_jit_stack *stack) #endif { (void)stack; } -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) #endif { (void)extra; (void)callback; (void)userdata; +} + +#if defined COMPILE_PCRE8 +PCRE_EXP_DECL void +pcre_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE16 +PCRE_EXP_DECL void +pcre16_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_jit_free_unused_memory(void) +#endif +{ } #endif