|
version 1.1.1.3, 2012/10/09 09:19:17
|
version 1.1.1.5, 2014/06/15 19:46:03
|
|
Line 66 string of that length that matches. In UTF8 mode, the
|
Line 66 string of that length that matches. In UTF8 mode, the
|
| rather than bytes. |
rather than bytes. |
| |
|
| Arguments: |
Arguments: |
| |
re compiled pattern block |
| code pointer to start of group (the bracket) |
code pointer to start of group (the bracket) |
| startcode pointer to start of the whole pattern | startcode pointer to start of the whole pattern's code |
| options the compiling options |
options the compiling options |
| int RECURSE depth |
int RECURSE depth |
| |
|
|
Line 78 Returns: the minimum length
|
Line 79 Returns: the minimum length
|
| */ |
*/ |
| |
|
| static int |
static int |
| find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, | find_minlength(const REAL_PCRE *re, const pcre_uchar *code, |
| int recurse_depth) | const pcre_uchar *startcode, int options, int recurse_depth) |
| { |
{ |
| int length = -1; |
int length = -1; |
| /* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
|
Line 98 for (;;)
|
Line 99 for (;;)
|
| { |
{ |
| int d, min; |
int d, min; |
| pcre_uchar *cs, *ce; |
pcre_uchar *cs, *ce; |
| register int op = *cc; | register pcre_uchar op = *cc; |
| |
|
| switch (op) |
switch (op) |
| { |
{ |
|
Line 129 for (;;)
|
Line 130 for (;;)
|
| case OP_SBRAPOS: |
case OP_SBRAPOS: |
| case OP_ONCE: |
case OP_ONCE: |
| case OP_ONCE_NC: |
case OP_ONCE_NC: |
| d = find_minlength(cc, startcode, options, recurse_depth); | d = find_minlength(re, cc, startcode, options, recurse_depth); |
| if (d < 0) return d; |
if (d < 0) return d; |
| branchlength += d; |
branchlength += d; |
| do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
|
Line 175 for (;;)
|
Line 176 for (;;)
|
| |
|
| case OP_REVERSE: |
case OP_REVERSE: |
| case OP_CREF: |
case OP_CREF: |
| case OP_NCREF: | case OP_DNCREF: |
| case OP_RREF: |
case OP_RREF: |
| case OP_NRREF: | case OP_DNRREF: |
| case OP_DEF: |
case OP_DEF: |
| case OP_CALLOUT: |
case OP_CALLOUT: |
| case OP_SOD: |
case OP_SOD: |
|
Line 323 for (;;)
|
Line 324 for (;;)
|
| |
|
| /* Check a class for variable quantification */ |
/* Check a class for variable quantification */ |
| |
|
| #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
|
| case OP_XCLASS: |
|
| cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; |
|
| /* Fall through */ |
|
| #endif |
|
| |
|
| case OP_CLASS: |
case OP_CLASS: |
| case OP_NCLASS: |
case OP_NCLASS: |
| |
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| |
case OP_XCLASS: |
| |
/* The original code caused an unsigned overflow in 64 bit systems, |
| |
so now we use a conditional statement. */ |
| |
if (op == OP_XCLASS) |
| |
cc += GET(cc, 1); |
| |
else |
| |
cc += PRIV(OP_lengths)[OP_CLASS]; |
| |
#else |
| cc += PRIV(OP_lengths)[OP_CLASS]; |
cc += PRIV(OP_lengths)[OP_CLASS]; |
| |
#endif |
| |
|
| switch (*cc) |
switch (*cc) |
| { |
{ |
| case OP_CRPLUS: |
case OP_CRPLUS: |
| case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
| |
case OP_CRPOSPLUS: |
| branchlength++; |
branchlength++; |
| /* Fall through */ |
/* Fall through */ |
| |
|
|
Line 344 for (;;)
|
Line 350 for (;;)
|
| case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
| case OP_CRQUERY: |
case OP_CRQUERY: |
| case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
| |
case OP_CRPOSSTAR: |
| |
case OP_CRPOSQUERY: |
| cc++; |
cc++; |
| break; |
break; |
| |
|
| case OP_CRRANGE: |
case OP_CRRANGE: |
| case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
| |
case OP_CRPOSRANGE: |
| branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
| cc += 1 + 2 * IMM2_SIZE; |
cc += 1 + 2 * IMM2_SIZE; |
| break; |
break; |
|
Line 371 for (;;)
|
Line 380 for (;;)
|
| matches an empty string (by default it causes a matching failure), so in |
matches an empty string (by default it causes a matching failure), so in |
| that case we must set the minimum length to zero. */ |
that case we must set the minimum length to zero. */ |
| |
|
| case OP_REF: | case OP_DNREF: /* Duplicate named pattern back reference */ |
| | case OP_DNREFI: |
| | if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
| | { |
| | int count = GET2(cc, 1+IMM2_SIZE); |
| | pcre_uchar *slot = (pcre_uchar *)re + |
| | re->name_table_offset + GET2(cc, 1) * re->name_entry_size; |
| | d = INT_MAX; |
| | while (count-- > 0) |
| | { |
| | ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); |
| | if (cs == NULL) return -2; |
| | do ce += GET(ce, 1); while (*ce == OP_ALT); |
| | if (cc > cs && cc < ce) |
| | { |
| | d = 0; |
| | had_recurse = TRUE; |
| | break; |
| | } |
| | else |
| | { |
| | int dd = find_minlength(re, cs, startcode, options, recurse_depth); |
| | if (dd < d) d = dd; |
| | } |
| | slot += re->name_entry_size; |
| | } |
| | } |
| | else d = 0; |
| | cc += 1 + 2*IMM2_SIZE; |
| | goto REPEAT_BACK_REFERENCE; |
| | |
| | case OP_REF: /* Single back reference */ |
| case OP_REFI: |
case OP_REFI: |
| if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
| { |
{ |
|
Line 385 for (;;)
|
Line 425 for (;;)
|
| } |
} |
| else |
else |
| { |
{ |
| d = find_minlength(cs, startcode, options, recurse_depth); | d = find_minlength(re, cs, startcode, options, recurse_depth); |
| } |
} |
| } |
} |
| else d = 0; |
else d = 0; |
|
Line 393 for (;;)
|
Line 433 for (;;)
|
| |
|
| /* Handle repeated back references */ |
/* Handle repeated back references */ |
| |
|
| |
REPEAT_BACK_REFERENCE: |
| switch (*cc) |
switch (*cc) |
| { |
{ |
| case OP_CRSTAR: |
case OP_CRSTAR: |
| case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
| case OP_CRQUERY: |
case OP_CRQUERY: |
| case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
| |
case OP_CRPOSSTAR: |
| |
case OP_CRPOSQUERY: |
| min = 0; |
min = 0; |
| cc++; |
cc++; |
| break; |
break; |
| |
|
| case OP_CRPLUS: |
case OP_CRPLUS: |
| case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
| |
case OP_CRPOSPLUS: |
| min = 1; |
min = 1; |
| cc++; |
cc++; |
| break; |
break; |
| |
|
| case OP_CRRANGE: |
case OP_CRRANGE: |
| case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
| |
case OP_CRPOSRANGE: |
| min = GET2(cc, 1); |
min = GET2(cc, 1); |
| cc += 1 + 2 * IMM2_SIZE; |
cc += 1 + 2 * IMM2_SIZE; |
| break; |
break; |
|
Line 433 for (;;)
|
Line 478 for (;;)
|
| had_recurse = TRUE; |
had_recurse = TRUE; |
| else |
else |
| { |
{ |
| branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); | branchlength += find_minlength(re, cs, startcode, options, |
| | recurse_depth + 1); |
| } |
} |
| cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| break; |
break; |
|
Line 538 Arguments:
|
Line 584 Arguments:
|
| p points to the character |
p points to the character |
| caseless the caseless flag |
caseless the caseless flag |
| cd the block with char table pointers |
cd the block with char table pointers |
| utf TRUE for UTF-8 / UTF-16 mode | utf TRUE for UTF-8 / UTF-16 / UTF-32 mode |
| |
|
| Returns: pointer after the character |
Returns: pointer after the character |
| */ |
*/ |
|
Line 547 static const pcre_uchar *
|
Line 593 static const pcre_uchar *
|
| set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, |
set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, |
| compile_data *cd, BOOL utf) |
compile_data *cd, BOOL utf) |
| { |
{ |
| unsigned int c = *p; | pcre_uint32 c = *p; |
| |
|
| #ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
| SET_BIT(c); |
SET_BIT(c); |
|
Line 564 if (utf && c > 127)
|
Line 610 if (utf && c > 127)
|
| (void)PRIV(ord2utf)(c, buff); |
(void)PRIV(ord2utf)(c, buff); |
| SET_BIT(buff[0]); |
SET_BIT(buff[0]); |
| } |
} |
| #endif | #endif /* Not SUPPORT_UCP */ |
| return p; |
return p; |
| } |
} |
| #endif | #else /* Not SUPPORT_UTF */ |
| | (void)(utf); /* Stops warning for unused parameter */ |
| | #endif /* SUPPORT_UTF */ |
| |
|
| /* Not UTF-8 mode, or character is less than 127. */ |
/* Not UTF-8 mode, or character is less than 127. */ |
| |
|
| if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
| return p + 1; |
return p + 1; |
| #endif | #endif /* COMPILE_PCRE8 */ |
| |
|
| #ifdef COMPILE_PCRE16 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| if (c > 0xff) |
if (c > 0xff) |
| { |
{ |
| c = 0xff; |
c = 0xff; |
|
Line 595 if (utf && c > 127)
|
Line 643 if (utf && c > 127)
|
| c = 0xff; |
c = 0xff; |
| SET_BIT(c); |
SET_BIT(c); |
| } |
} |
| #endif | #endif /* SUPPORT_UCP */ |
| return p; |
return p; |
| } |
} |
| #endif | #else /* Not SUPPORT_UTF */ |
| | (void)(utf); /* Stops warning for unused parameter */ |
| | #endif /* SUPPORT_UTF */ |
| |
|
| if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
| return p + 1; |
return p + 1; |
|
Line 628 Returns: nothing
|
Line 678 Returns: nothing
|
| */ |
*/ |
| |
|
| static void |
static void |
| set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, | set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, |
| compile_data *cd) |
compile_data *cd) |
| { |
{ |
| register int c; | register pcre_uint32 c; |
| for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| if (table_limit == 32) return; |
if (table_limit == 32) return; |
|
Line 670 Returns: nothing
|
Line 720 Returns: nothing
|
| */ |
*/ |
| |
|
| static void |
static void |
| set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, | set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit, |
| compile_data *cd) |
compile_data *cd) |
| { |
{ |
| register int c; | register pcre_uint32 c; |
| for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
|
Line 697 function fails unless the result is SSB_DONE.
|
Line 747 function fails unless the result is SSB_DONE.
|
| Arguments: |
Arguments: |
| code points to an expression |
code points to an expression |
| start_bits points to a 32-byte table, initialized to 0 |
start_bits points to a 32-byte table, initialized to 0 |
| utf TRUE if in UTF-8 / UTF-16 mode | utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode |
| cd the block with char table pointers |
cd the block with char table pointers |
| |
|
| Returns: SSB_FAIL => Failed to find any starting bytes |
Returns: SSB_FAIL => Failed to find any starting bytes |
|
Line 710 static int
|
Line 760 static int
|
| set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, |
set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, |
| compile_data *cd) |
compile_data *cd) |
| { |
{ |
| register int c; | register pcre_uint32 c; |
| int yield = SSB_DONE; |
int yield = SSB_DONE; |
| #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| int table_limit = utf? 16:32; |
int table_limit = utf? 16:32; |
|
Line 770 do
|
Line 820 do
|
| case OP_COND: |
case OP_COND: |
| case OP_CREF: |
case OP_CREF: |
| case OP_DEF: |
case OP_DEF: |
| |
case OP_DNCREF: |
| |
case OP_DNREF: |
| |
case OP_DNREFI: |
| |
case OP_DNRREF: |
| case OP_DOLL: |
case OP_DOLL: |
| case OP_DOLLM: |
case OP_DOLLM: |
| case OP_END: |
case OP_END: |
|
Line 778 do
|
Line 832 do
|
| case OP_EXTUNI: |
case OP_EXTUNI: |
| case OP_FAIL: |
case OP_FAIL: |
| case OP_MARK: |
case OP_MARK: |
| case OP_NCREF: |
|
| case OP_NOT: |
case OP_NOT: |
| case OP_NOTEXACT: |
case OP_NOTEXACT: |
| case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
|
Line 810 do
|
Line 863 do
|
| case OP_NOTUPTOI: |
case OP_NOTUPTOI: |
| case OP_NOT_HSPACE: |
case OP_NOT_HSPACE: |
| case OP_NOT_VSPACE: |
case OP_NOT_VSPACE: |
| case OP_NRREF: |
|
| case OP_PROP: |
case OP_PROP: |
| case OP_PRUNE: |
case OP_PRUNE: |
| case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
|
Line 986 do
|
Line 1038 do
|
| identical. */ |
identical. */ |
| |
|
| case OP_HSPACE: |
case OP_HSPACE: |
| SET_BIT(0x09); | SET_BIT(CHAR_HT); |
| SET_BIT(0x20); | SET_BIT(CHAR_SPACE); |
| #ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
| if (utf) |
if (utf) |
| { |
{ |
|
Line 996 do
|
Line 1048 do
|
| SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| #endif | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| #ifdef COMPILE_PCRE16 | |
| SET_BIT(0xA0); |
SET_BIT(0xA0); |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif | #endif /* COMPILE_PCRE[8|16|32] */ |
| } |
} |
| else |
else |
| #endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
| { |
{ |
| |
#ifndef EBCDIC |
| SET_BIT(0xA0); |
SET_BIT(0xA0); |
| #ifdef COMPILE_PCRE16 | #endif /* Not EBCDIC */ |
| | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif | #endif /* COMPILE_PCRE[16|32] */ |
| } |
} |
| try_next = FALSE; |
try_next = FALSE; |
| break; |
break; |
| |
|
| case OP_ANYNL: |
case OP_ANYNL: |
| case OP_VSPACE: |
case OP_VSPACE: |
| SET_BIT(0x0A); | SET_BIT(CHAR_LF); |
| SET_BIT(0x0B); | SET_BIT(CHAR_VT); |
| SET_BIT(0x0C); | SET_BIT(CHAR_FF); |
| SET_BIT(0x0D); | SET_BIT(CHAR_CR); |
| #ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
| if (utf) |
if (utf) |
| { |
{ |
| #ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
| SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
| SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| #endif | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| #ifdef COMPILE_PCRE16 | SET_BIT(CHAR_NEL); |
| SET_BIT(0x85); | |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif | #endif /* COMPILE_PCRE[8|16|32] */ |
| } |
} |
| else |
else |
| #endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
| { |
{ |
| SET_BIT(0x85); | SET_BIT(CHAR_NEL); |
| #ifdef COMPILE_PCRE16 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif |
#endif |
| } |
} |
|
Line 1058 do
|
Line 1110 do
|
| break; |
break; |
| |
|
| /* The cbit_space table has vertical tab as whitespace; we have to |
/* The cbit_space table has vertical tab as whitespace; we have to |
| ensure it is set as not whitespace. */ | ensure it is set as not whitespace. Luckily, the code value is the same |
| | (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */ |
| |
|
| case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
|
Line 1066 do
|
Line 1119 do
|
| try_next = FALSE; |
try_next = FALSE; |
| break; |
break; |
| |
|
| /* The cbit_space table has vertical tab as whitespace; we have to | /* The cbit_space table has vertical tab as whitespace; we have to not |
| not set it from the table. */ | set it from the table. Luckily, the code value is the same (0x0b) in |
| | ASCII and EBCDIC, so we can just adjust the appropriate bit. */ |
| |
|
| case OP_WHITESPACE: |
case OP_WHITESPACE: |
| c = start_bits[1]; /* Save in case it was already set */ |
c = start_bits[1]; /* Save in case it was already set */ |
|
Line 1121 do
|
Line 1175 do
|
| return SSB_FAIL; |
return SSB_FAIL; |
| |
|
| case OP_HSPACE: |
case OP_HSPACE: |
| SET_BIT(0x09); | SET_BIT(CHAR_HT); |
| SET_BIT(0x20); | SET_BIT(CHAR_SPACE); |
| #ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
| if (utf) |
if (utf) |
| { |
{ |
|
Line 1131 do
|
Line 1185 do
|
| SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| #endif | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| #ifdef COMPILE_PCRE16 | |
| SET_BIT(0xA0); |
SET_BIT(0xA0); |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif | #endif /* COMPILE_PCRE[8|16|32] */ |
| } |
} |
| else |
else |
| #endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
| |
#ifndef EBCDIC |
| SET_BIT(0xA0); |
SET_BIT(0xA0); |
| |
#endif /* Not EBCDIC */ |
| break; |
break; |
| |
|
| case OP_ANYNL: |
case OP_ANYNL: |
| case OP_VSPACE: |
case OP_VSPACE: |
| SET_BIT(0x0A); | SET_BIT(CHAR_LF); |
| SET_BIT(0x0B); | SET_BIT(CHAR_VT); |
| SET_BIT(0x0C); | SET_BIT(CHAR_FF); |
| SET_BIT(0x0D); | SET_BIT(CHAR_CR); |
| #ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
| if (utf) |
if (utf) |
| { |
{ |
| #ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
| SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
| SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| #endif | #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| #ifdef COMPILE_PCRE16 | SET_BIT(CHAR_NEL); |
| SET_BIT(0x85); | |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif | #endif /* COMPILE_PCRE16 */ |
| } |
} |
| else |
else |
| #endif /* SUPPORT_UTF */ |
#endif /* SUPPORT_UTF */ |
| SET_BIT(0x85); | SET_BIT(CHAR_NEL); |
| break; |
break; |
| |
|
| case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
|
Line 1173 do
|
Line 1227 do
|
| set_type_bits(start_bits, cbit_digit, table_limit, cd); |
set_type_bits(start_bits, cbit_digit, table_limit, cd); |
| break; |
break; |
| |
|
| /* The cbit_space table has vertical tab as whitespace; we have to | /* The cbit_space table has vertical tab as whitespace; we no longer |
| ensure it gets set as not whitespace. */ | have to play fancy tricks because Perl added VT to its whitespace at |
| | release 5.18. PCRE added it at release 8.34. */ |
| |
|
| case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
| start_bits[1] |= 0x08; |
|
| break; |
break; |
| |
|
| /* The cbit_space table has vertical tab as whitespace; we have to |
|
| avoid setting it. */ |
|
| |
|
| case OP_WHITESPACE: |
case OP_WHITESPACE: |
| c = start_bits[1]; /* Save in case it was already set */ |
|
| set_type_bits(start_bits, cbit_space, table_limit, cd); |
set_type_bits(start_bits, cbit_space, table_limit, cd); |
| start_bits[1] = (start_bits[1] & ~0x08) | c; |
|
| break; |
break; |
| |
|
| case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
|
Line 1216 do
|
Line 1265 do
|
| memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
| } |
} |
| #endif |
#endif |
| #ifdef COMPILE_PCRE16 | #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
| SET_BIT(0xFF); /* For characters > 255 */ |
SET_BIT(0xFF); /* For characters > 255 */ |
| #endif |
#endif |
| /* Fall through */ |
/* Fall through */ |
|
Line 1264 do
|
Line 1313 do
|
| case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
| case OP_CRQUERY: |
case OP_CRQUERY: |
| case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
| |
case OP_CRPOSSTAR: |
| |
case OP_CRPOSQUERY: |
| tcode++; |
tcode++; |
| break; |
break; |
| |
|
| case OP_CRRANGE: |
case OP_CRRANGE: |
| case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
| |
case OP_CRPOSRANGE: |
| if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; |
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; |
| else try_next = FALSE; |
else try_next = FALSE; |
| break; |
break; |
|
Line 1312 Returns: pointer to a pcre[16]_extra block, with st
|
Line 1364 Returns: pointer to a pcre[16]_extra block, with st
|
| NULL on error or if no optimization possible |
NULL on error or if no optimization possible |
| */ |
*/ |
| |
|
| #ifdef COMPILE_PCRE8 | #if defined COMPILE_PCRE8 |
| PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION |
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION |
| pcre_study(const pcre *external_re, int options, const char **errorptr) |
pcre_study(const pcre *external_re, int options, const char **errorptr) |
| #else | #elif defined COMPILE_PCRE16 |
| PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION |
PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION |
| pcre16_study(const pcre16 *external_re, int options, const char **errorptr) |
pcre16_study(const pcre16 *external_re, int options, const char **errorptr) |
| |
#elif defined COMPILE_PCRE32 |
| |
PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION |
| |
pcre32_study(const pcre32 *external_re, int options, const char **errorptr) |
| #endif |
#endif |
| { |
{ |
| int min; |
int min; |
|
Line 1330 pcre_uchar *code;
|
Line 1385 pcre_uchar *code;
|
| compile_data compile_block; |
compile_data compile_block; |
| const REAL_PCRE *re = (const REAL_PCRE *)external_re; |
const REAL_PCRE *re = (const REAL_PCRE *)external_re; |
| |
|
| |
|
| *errorptr = NULL; |
*errorptr = NULL; |
| |
|
| if (re == NULL || re->magic_number != MAGIC_NUMBER) |
if (re == NULL || re->magic_number != MAGIC_NUMBER) |
|
Line 1340 if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
Line 1396 if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
| |
|
| if ((re->flags & PCRE_MODE) == 0) |
if ((re->flags & PCRE_MODE) == 0) |
| { |
{ |
| #ifdef COMPILE_PCRE8 | #if defined COMPILE_PCRE8 |
| *errorptr = "argument is compiled in 16 bit mode"; | *errorptr = "argument not compiled in 8 bit mode"; |
| #else | #elif defined COMPILE_PCRE16 |
| *errorptr = "argument is compiled in 8 bit mode"; | *errorptr = "argument not compiled in 16 bit mode"; |
| | #elif defined COMPILE_PCRE32 |
| | *errorptr = "argument not compiled in 32 bit mode"; |
| #endif |
#endif |
| return NULL; |
return NULL; |
| } |
} |
|
Line 1370 if ((re->options & PCRE_ANCHORED) == 0 &&
|
Line 1428 if ((re->options & PCRE_ANCHORED) == 0 &&
|
| |
|
| tables = re->tables; |
tables = re->tables; |
| |
|
| #ifdef COMPILE_PCRE8 | #if defined COMPILE_PCRE8 |
| if (tables == NULL) |
if (tables == NULL) |
| (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
| (void *)(&tables)); |
(void *)(&tables)); |
| #else | #elif defined COMPILE_PCRE16 |
| if (tables == NULL) |
if (tables == NULL) |
| (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
(void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
| (void *)(&tables)); |
(void *)(&tables)); |
| |
#elif defined COMPILE_PCRE32 |
| |
if (tables == NULL) |
| |
(void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
| |
(void *)(&tables)); |
| #endif |
#endif |
| |
|
| compile_block.lcc = tables + lcc_offset; |
compile_block.lcc = tables + lcc_offset; |
|
Line 1400 if ((re->options & PCRE_ANCHORED) == 0 &&
|
Line 1462 if ((re->options & PCRE_ANCHORED) == 0 &&
|
| |
|
| /* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
| |
|
| switch(min = find_minlength(code, code, re->options, 0)) | switch(min = find_minlength(re, code, code, re->options, 0)) |
| { |
{ |
| case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
| case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |
|
Line 1408 switch(min = find_minlength(code, code, re->options, 0
|
Line 1470 switch(min = find_minlength(code, code, re->options, 0
|
| } |
} |
| |
|
| /* If a set of starting bytes has been identified, or if the minimum length is |
/* If a set of starting bytes has been identified, or if the minimum length is |
| greater than zero, or if JIT optimization has been requested, get a | greater than zero, or if JIT optimization has been requested, or if |
| pcre[16]_extra block and a pcre_study_data block. The study data is put in the | PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a |
| latter, which is pointed to by the former, which may also get additional data | pcre_study_data block. The study data is put in the latter, which is pointed to |
| set later by the calling program. At the moment, the size of pcre_study_data | by the former, which may also get additional data set later by the calling |
| is fixed. We nevertheless save it in a field for returning via the | program. At the moment, the size of pcre_study_data is fixed. We nevertheless |
| pcre_fullinfo() function so that if it becomes variable in the future, | save it in a field for returning via the pcre_fullinfo() function so that if it |
| we don't have to change that code. */ | becomes variable in the future, we don't have to change that code. */ |
| |
|
| if (bits_set || min > 0 | if (bits_set || min > 0 || (options & ( |
| #ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | |
| | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0 | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE | |
| #endif |
#endif |
| ) | PCRE_STUDY_EXTRA_NEEDED)) != 0) |
| { |
{ |
| extra = (PUBL(extra) *)(PUBL(malloc)) |
extra = (PUBL(extra) *)(PUBL(malloc)) |
| (sizeof(PUBL(extra)) + sizeof(pcre_study_data)); |
(sizeof(PUBL(extra)) + sizeof(pcre_study_data)); |
|
Line 1475 if (bits_set || min > 0
|
Line 1537 if (bits_set || min > 0
|
| |
|
| /* If JIT support was compiled and requested, attempt the JIT compilation. |
/* If JIT support was compiled and requested, attempt the JIT compilation. |
| If no starting bytes were found, and the minimum length is zero, and JIT |
If no starting bytes were found, and the minimum length is zero, and JIT |
| compilation fails, abandon the extra block and return NULL. */ | compilation fails, abandon the extra block and return NULL, unless |
| | PCRE_STUDY_EXTRA_NEEDED is set. */ |
| |
|
| #ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| extra->executable_jit = NULL; |
extra->executable_jit = NULL; |
|
Line 1486 if (bits_set || min > 0
|
Line 1549 if (bits_set || min > 0
|
| if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0) |
if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0) |
| PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE); |
PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE); |
| |
|
| if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) | if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 && |
| | (options & PCRE_STUDY_EXTRA_NEEDED) == 0) |
| { |
{ |
| #ifdef COMPILE_PCRE8 | #if defined COMPILE_PCRE8 |
| pcre_free_study(extra); |
pcre_free_study(extra); |
| #endif | #elif defined COMPILE_PCRE16 |
| #ifdef COMPILE_PCRE16 | |
| pcre16_free_study(extra); |
pcre16_free_study(extra); |
| |
#elif defined COMPILE_PCRE32 |
| |
pcre32_free_study(extra); |
| #endif |
#endif |
| extra = NULL; |
extra = NULL; |
| } |
} |
|
Line 1513 Argument: a pointer to the pcre[16]_extra block
|
Line 1578 Argument: a pointer to the pcre[16]_extra block
|
| Returns: nothing |
Returns: nothing |
| */ |
*/ |
| |
|
| #ifdef COMPILE_PCRE8 | #if defined COMPILE_PCRE8 |
| PCRE_EXP_DEFN void |
PCRE_EXP_DEFN void |
| pcre_free_study(pcre_extra *extra) |
pcre_free_study(pcre_extra *extra) |
| #else | #elif defined COMPILE_PCRE16 |
| PCRE_EXP_DEFN void |
PCRE_EXP_DEFN void |
| pcre16_free_study(pcre16_extra *extra) |
pcre16_free_study(pcre16_extra *extra) |
| |
#elif defined COMPILE_PCRE32 |
| |
PCRE_EXP_DEFN void |
| |
pcre32_free_study(pcre32_extra *extra) |
| #endif |
#endif |
| { |
{ |
| if (extra == NULL) |
if (extra == NULL) |