--- embedaddon/pcre/pcre_study.c 2012/02/21 23:50:25 1.1.1.2 +++ embedaddon/pcre/pcre_study.c 2014/06/15 19:46:03 1.1.1.5 @@ -66,8 +66,9 @@ string of that length that matches. In UTF8 mode, the rather than bytes. Arguments: + re compiled pattern block code pointer to start of group (the bracket) - startcode pointer to start of the whole pattern + startcode pointer to start of the whole pattern's code options the compiling options int RECURSE depth @@ -78,8 +79,8 @@ Returns: the minimum length */ static int -find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, - int recurse_depth) +find_minlength(const REAL_PCRE *re, const pcre_uchar *code, + const pcre_uchar *startcode, int options, int recurse_depth) { int length = -1; /* PCRE_UTF16 has the same value as PCRE_UTF8. */ @@ -98,7 +99,7 @@ for (;;) { int d, min; pcre_uchar *cs, *ce; - register int op = *cc; + register pcre_uchar op = *cc; switch (op) { @@ -129,7 +130,7 @@ for (;;) case OP_SBRAPOS: case OP_ONCE: case OP_ONCE_NC: - d = find_minlength(cc, startcode, options, recurse_depth); + d = find_minlength(re, cc, startcode, options, recurse_depth); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -175,9 +176,9 @@ for (;;) case OP_REVERSE: case OP_CREF: - case OP_NCREF: + case OP_DNCREF: case OP_RREF: - case OP_NRREF: + case OP_DNRREF: case OP_DEF: case OP_CALLOUT: case OP_SOD: @@ -323,20 +324,25 @@ for (;;) /* Check a class for variable quantification */ -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 - case OP_XCLASS: - cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; - /* Fall through */ -#endif - case OP_CLASS: case OP_NCLASS: +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + case OP_XCLASS: + /* The original code caused an unsigned overflow in 64 bit systems, + so now we use a conditional statement. */ + if (op == OP_XCLASS) + cc += GET(cc, 1); + else + cc += PRIV(OP_lengths)[OP_CLASS]; +#else cc += PRIV(OP_lengths)[OP_CLASS]; +#endif switch (*cc) { case OP_CRPLUS: case OP_CRMINPLUS: + case OP_CRPOSPLUS: branchlength++; /* Fall through */ @@ -344,11 +350,14 @@ for (;;) case OP_CRMINSTAR: case OP_CRQUERY: case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: cc++; break; case OP_CRRANGE: case OP_CRMINRANGE: + case OP_CRPOSRANGE: branchlength += GET2(cc,1); cc += 1 + 2 * IMM2_SIZE; break; @@ -371,7 +380,38 @@ for (;;) matches an empty string (by default it causes a matching failure), so in that case we must set the minimum length to zero. */ - case OP_REF: + case OP_DNREF: /* Duplicate named pattern back reference */ + case OP_DNREFI: + if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) + { + int count = GET2(cc, 1+IMM2_SIZE); + pcre_uchar *slot = (pcre_uchar *)re + + re->name_table_offset + GET2(cc, 1) * re->name_entry_size; + d = INT_MAX; + while (count-- > 0) + { + ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); + if (cs == NULL) return -2; + do ce += GET(ce, 1); while (*ce == OP_ALT); + if (cc > cs && cc < ce) + { + d = 0; + had_recurse = TRUE; + break; + } + else + { + int dd = find_minlength(re, cs, startcode, options, recurse_depth); + if (dd < d) d = dd; + } + slot += re->name_entry_size; + } + } + else d = 0; + cc += 1 + 2*IMM2_SIZE; + goto REPEAT_BACK_REFERENCE; + + case OP_REF: /* Single back reference */ case OP_REFI: if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) { @@ -385,7 +425,7 @@ for (;;) } else { - d = find_minlength(cs, startcode, options, recurse_depth); + d = find_minlength(re, cs, startcode, options, recurse_depth); } } else d = 0; @@ -393,24 +433,29 @@ for (;;) /* Handle repeated back references */ + REPEAT_BACK_REFERENCE: switch (*cc) { case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRQUERY: case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: min = 0; cc++; break; case OP_CRPLUS: case OP_CRMINPLUS: + case OP_CRPOSPLUS: min = 1; cc++; break; case OP_CRRANGE: case OP_CRMINRANGE: + case OP_CRPOSRANGE: min = GET2(cc, 1); cc += 1 + 2 * IMM2_SIZE; break; @@ -433,7 +478,8 @@ for (;;) had_recurse = TRUE; else { - branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); + branchlength += find_minlength(re, cs, startcode, options, + recurse_depth + 1); } cc += 1 + LINK_SIZE; break; @@ -538,7 +584,7 @@ Arguments: p points to the character caseless the caseless flag cd the block with char table pointers - utf TRUE for UTF-8 / UTF-16 mode + utf TRUE for UTF-8 / UTF-16 / UTF-32 mode Returns: pointer after the character */ @@ -547,7 +593,7 @@ static const pcre_uchar * set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, compile_data *cd, BOOL utf) { -unsigned int c = *p; +pcre_uint32 c = *p; #ifdef COMPILE_PCRE8 SET_BIT(c); @@ -564,18 +610,20 @@ if (utf && c > 127) (void)PRIV(ord2utf)(c, buff); SET_BIT(buff[0]); } -#endif +#endif /* Not SUPPORT_UCP */ return p; } -#endif +#else /* Not SUPPORT_UTF */ +(void)(utf); /* Stops warning for unused parameter */ +#endif /* SUPPORT_UTF */ /* Not UTF-8 mode, or character is less than 127. */ if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); return p + 1; -#endif +#endif /* COMPILE_PCRE8 */ -#ifdef COMPILE_PCRE16 +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 if (c > 0xff) { c = 0xff; @@ -595,10 +643,12 @@ if (utf && c > 127) c = 0xff; SET_BIT(c); } -#endif +#endif /* SUPPORT_UCP */ return p; } -#endif +#else /* Not SUPPORT_UTF */ +(void)(utf); /* Stops warning for unused parameter */ +#endif /* SUPPORT_UTF */ if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); return p + 1; @@ -628,10 +678,10 @@ Returns: nothing */ static void -set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, +set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, compile_data *cd) { -register int c; +register pcre_uint32 c; for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (table_limit == 32) return; @@ -670,10 +720,10 @@ Returns: nothing */ static void -set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, +set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, compile_data *cd) { -register int c; +register pcre_uint32 c; for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; @@ -697,7 +747,7 @@ function fails unless the result is SSB_DONE. Arguments: code points to an expression start_bits points to a 32-byte table, initialized to 0 - utf TRUE if in UTF-8 / UTF-16 mode + utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode cd the block with char table pointers Returns: SSB_FAIL => Failed to find any starting bytes @@ -710,7 +760,7 @@ static int set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, compile_data *cd) { -register int c; +register pcre_uint32 c; int yield = SSB_DONE; #if defined SUPPORT_UTF && defined COMPILE_PCRE8 int table_limit = utf? 16:32; @@ -770,6 +820,10 @@ do case OP_COND: case OP_CREF: case OP_DEF: + case OP_DNCREF: + case OP_DNREF: + case OP_DNREFI: + case OP_DNRREF: case OP_DOLL: case OP_DOLLM: case OP_END: @@ -778,7 +832,6 @@ do case OP_EXTUNI: case OP_FAIL: case OP_MARK: - case OP_NCREF: case OP_NOT: case OP_NOTEXACT: case OP_NOTEXACTI: @@ -810,7 +863,6 @@ do case OP_NOTUPTOI: case OP_NOT_HSPACE: case OP_NOT_VSPACE: - case OP_NRREF: case OP_PROP: case OP_PRUNE: case OP_PRUNE_ARG: @@ -986,8 +1038,8 @@ do identical. */ case OP_HSPACE: - SET_BIT(0x09); - SET_BIT(0x20); + SET_BIT(CHAR_HT); + SET_BIT(CHAR_SPACE); #ifdef SUPPORT_UTF if (utf) { @@ -996,46 +1048,46 @@ do SET_BIT(0xE1); /* For U+1680, U+180E */ SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ SET_BIT(0xE3); /* For U+3000 */ -#endif -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 SET_BIT(0xA0); SET_BIT(0xFF); /* For characters > 255 */ -#endif +#endif /* COMPILE_PCRE[8|16|32] */ } else #endif /* SUPPORT_UTF */ { +#ifndef EBCDIC SET_BIT(0xA0); -#ifdef COMPILE_PCRE16 +#endif /* Not EBCDIC */ +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 SET_BIT(0xFF); /* For characters > 255 */ -#endif +#endif /* COMPILE_PCRE[16|32] */ } try_next = FALSE; break; case OP_ANYNL: case OP_VSPACE: - SET_BIT(0x0A); - SET_BIT(0x0B); - SET_BIT(0x0C); - SET_BIT(0x0D); + SET_BIT(CHAR_LF); + SET_BIT(CHAR_VT); + SET_BIT(CHAR_FF); + SET_BIT(CHAR_CR); #ifdef SUPPORT_UTF if (utf) { #ifdef COMPILE_PCRE8 SET_BIT(0xC2); /* For U+0085 */ SET_BIT(0xE2); /* For U+2028, U+2029 */ -#endif -#ifdef COMPILE_PCRE16 - SET_BIT(0x85); +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + SET_BIT(CHAR_NEL); SET_BIT(0xFF); /* For characters > 255 */ -#endif +#endif /* COMPILE_PCRE[8|16|32] */ } else #endif /* SUPPORT_UTF */ { - SET_BIT(0x85); -#ifdef COMPILE_PCRE16 + SET_BIT(CHAR_NEL); +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 SET_BIT(0xFF); /* For characters > 255 */ #endif } @@ -1058,7 +1110,8 @@ do break; /* The cbit_space table has vertical tab as whitespace; we have to - ensure it is set as not whitespace. */ + ensure it is set as not whitespace. Luckily, the code value is the same + (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */ case OP_NOT_WHITESPACE: set_nottype_bits(start_bits, cbit_space, table_limit, cd); @@ -1066,8 +1119,9 @@ do try_next = FALSE; break; - /* The cbit_space table has vertical tab as whitespace; we have to - not set it from the table. */ + /* The cbit_space table has vertical tab as whitespace; we have to not + set it from the table. Luckily, the code value is the same (0x0b) in + ASCII and EBCDIC, so we can just adjust the appropriate bit. */ case OP_WHITESPACE: c = start_bits[1]; /* Save in case it was already set */ @@ -1121,9 +1175,9 @@ do return SSB_FAIL; case OP_HSPACE: - SET_BIT(0x09); - SET_BIT(0x20); -#ifdef COMPILE_PCRE8 + SET_BIT(CHAR_HT); + SET_BIT(CHAR_SPACE); +#ifdef SUPPORT_UTF if (utf) { #ifdef COMPILE_PCRE8 @@ -1131,38 +1185,38 @@ do SET_BIT(0xE1); /* For U+1680, U+180E */ SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ SET_BIT(0xE3); /* For U+3000 */ -#endif -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 SET_BIT(0xA0); SET_BIT(0xFF); /* For characters > 255 */ -#endif +#endif /* COMPILE_PCRE[8|16|32] */ } else #endif /* SUPPORT_UTF */ +#ifndef EBCDIC SET_BIT(0xA0); +#endif /* Not EBCDIC */ break; case OP_ANYNL: case OP_VSPACE: - SET_BIT(0x0A); - SET_BIT(0x0B); - SET_BIT(0x0C); - SET_BIT(0x0D); -#ifdef COMPILE_PCRE8 + SET_BIT(CHAR_LF); + SET_BIT(CHAR_VT); + SET_BIT(CHAR_FF); + SET_BIT(CHAR_CR); +#ifdef SUPPORT_UTF if (utf) { #ifdef COMPILE_PCRE8 SET_BIT(0xC2); /* For U+0085 */ SET_BIT(0xE2); /* For U+2028, U+2029 */ -#endif -#ifdef COMPILE_PCRE16 - SET_BIT(0x85); +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + SET_BIT(CHAR_NEL); SET_BIT(0xFF); /* For characters > 255 */ -#endif +#endif /* COMPILE_PCRE16 */ } else #endif /* SUPPORT_UTF */ - SET_BIT(0x85); + SET_BIT(CHAR_NEL); break; case OP_NOT_DIGIT: @@ -1173,21 +1227,16 @@ do set_type_bits(start_bits, cbit_digit, table_limit, cd); break; - /* The cbit_space table has vertical tab as whitespace; we have to - ensure it gets set as not whitespace. */ + /* The cbit_space table has vertical tab as whitespace; we no longer + have to play fancy tricks because Perl added VT to its whitespace at + release 5.18. PCRE added it at release 8.34. */ case OP_NOT_WHITESPACE: set_nottype_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] |= 0x08; break; - /* The cbit_space table has vertical tab as whitespace; we have to - avoid setting it. */ - case OP_WHITESPACE: - c = start_bits[1]; /* Save in case it was already set */ set_type_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] = (start_bits[1] & ~0x08) | c; break; case OP_NOT_WORDCHAR: @@ -1216,7 +1265,7 @@ do memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ } #endif -#ifdef COMPILE_PCRE16 +#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 SET_BIT(0xFF); /* For characters > 255 */ #endif /* Fall through */ @@ -1264,11 +1313,14 @@ do case OP_CRMINSTAR: case OP_CRQUERY: case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: tcode++; break; case OP_CRRANGE: case OP_CRMINRANGE: + case OP_CRPOSRANGE: if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; else try_next = FALSE; break; @@ -1312,12 +1364,15 @@ Returns: pointer to a pcre[16]_extra block, with st NULL on error or if no optimization possible */ -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION pcre_study(const pcre *external_re, int options, const char **errorptr) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION pcre16_study(const pcre16 *external_re, int options, const char **errorptr) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION +pcre32_study(const pcre32 *external_re, int options, const char **errorptr) #endif { int min; @@ -1330,6 +1385,7 @@ pcre_uchar *code; compile_data compile_block; const REAL_PCRE *re = (const REAL_PCRE *)external_re; + *errorptr = NULL; if (re == NULL || re->magic_number != MAGIC_NUMBER) @@ -1340,10 +1396,12 @@ if (re == NULL || re->magic_number != MAGIC_NUMBER) if ((re->flags & PCRE_MODE) == 0) { -#ifdef COMPILE_PCRE8 - *errorptr = "argument is compiled in 16 bit mode"; -#else - *errorptr = "argument is compiled in 8 bit mode"; +#if defined COMPILE_PCRE8 + *errorptr = "argument not compiled in 8 bit mode"; +#elif defined COMPILE_PCRE16 + *errorptr = "argument not compiled in 16 bit mode"; +#elif defined COMPILE_PCRE32 + *errorptr = "argument not compiled in 32 bit mode"; #endif return NULL; } @@ -1370,14 +1428,18 @@ if ((re->options & PCRE_ANCHORED) == 0 && tables = re->tables; -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 if (tables == NULL) (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, (void *)(&tables)); -#else +#elif defined COMPILE_PCRE16 if (tables == NULL) (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, (void *)(&tables)); +#elif defined COMPILE_PCRE32 + if (tables == NULL) + (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, + (void *)(&tables)); #endif compile_block.lcc = tables + lcc_offset; @@ -1400,7 +1462,7 @@ if ((re->options & PCRE_ANCHORED) == 0 && /* Find the minimum length of subject string. */ -switch(min = find_minlength(code, code, re->options, 0)) +switch(min = find_minlength(re, code, code, re->options, 0)) { case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; case -3: *errorptr = "internal error: opcode not recognized"; return NULL; @@ -1408,19 +1470,20 @@ switch(min = find_minlength(code, code, re->options, 0 } /* If a set of starting bytes has been identified, or if the minimum length is -greater than zero, or if JIT optimization has been requested, get a -pcre[16]_extra block and a pcre_study_data block. The study data is put in the -latter, which is pointed to by the former, which may also get additional data -set later by the calling program. At the moment, the size of pcre_study_data -is fixed. We nevertheless save it in a field for returning via the -pcre_fullinfo() function so that if it becomes variable in the future, -we don't have to change that code. */ +greater than zero, or if JIT optimization has been requested, or if +PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a +pcre_study_data block. The study data is put in the latter, which is pointed to +by the former, which may also get additional data set later by the calling +program. At the moment, the size of pcre_study_data is fixed. We nevertheless +save it in a field for returning via the pcre_fullinfo() function so that if it +becomes variable in the future, we don't have to change that code. */ -if (bits_set || min > 0 +if (bits_set || min > 0 || (options & ( #ifdef SUPPORT_JIT - || (options & PCRE_STUDY_JIT_COMPILE) != 0 + PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE | #endif - ) + PCRE_STUDY_EXTRA_NEEDED)) != 0) { extra = (PUBL(extra) *)(PUBL(malloc)) (sizeof(PUBL(extra)) + sizeof(pcre_study_data)); @@ -1474,18 +1537,27 @@ if (bits_set || min > 0 /* If JIT support was compiled and requested, attempt the JIT compilation. If no starting bytes were found, and the minimum length is zero, and JIT - compilation fails, abandon the extra block and return NULL. */ + compilation fails, abandon the extra block and return NULL, unless + PCRE_STUDY_EXTRA_NEEDED is set. */ #ifdef SUPPORT_JIT extra->executable_jit = NULL; - if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra); - if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) + if ((options & PCRE_STUDY_JIT_COMPILE) != 0) + PRIV(jit_compile)(re, extra, JIT_COMPILE); + if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0) + PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE); + if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0) + PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE); + + if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 && + (options & PCRE_STUDY_EXTRA_NEEDED) == 0) { -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 pcre_free_study(extra); -#endif -#ifdef COMPILE_PCRE16 +#elif defined COMPILE_PCRE16 pcre16_free_study(extra); +#elif defined COMPILE_PCRE32 + pcre32_free_study(extra); #endif extra = NULL; } @@ -1506,12 +1578,15 @@ Argument: a pointer to the pcre[16]_extra block Returns: nothing */ -#ifdef COMPILE_PCRE8 +#if defined COMPILE_PCRE8 PCRE_EXP_DEFN void pcre_free_study(pcre_extra *extra) -#else +#elif defined COMPILE_PCRE16 PCRE_EXP_DEFN void pcre16_free_study(pcre16_extra *extra) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DEFN void +pcre32_free_study(pcre32_extra *extra) #endif { if (extra == NULL)