version 1.1, 2012/02/21 23:05:51
|
version 1.1.1.2, 2012/02/21 23:50:25
|
Line 6
|
Line 6
|
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
|
|
Written by Philip Hazel |
Written by Philip Hazel |
Copyright (c) 1997-2010 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
|
|
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
Line 78 Returns: the minimum length
|
Line 78 Returns: the minimum length
|
*/ |
*/ |
|
|
static int |
static int |
find_minlength(const uschar *code, const uschar *startcode, int options, | find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, |
int recurse_depth) |
int recurse_depth) |
{ |
{ |
int length = -1; |
int length = -1; |
BOOL utf8 = (options & PCRE_UTF8) != 0; | /* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
| BOOL utf = (options & PCRE_UTF8) != 0; |
BOOL had_recurse = FALSE; |
BOOL had_recurse = FALSE; |
register int branchlength = 0; |
register int branchlength = 0; |
register uschar *cc = (uschar *)code + 1 + LINK_SIZE; | register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; |
|
|
if (*code == OP_CBRA || *code == OP_SCBRA || |
if (*code == OP_CBRA || *code == OP_SCBRA || |
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2; | *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; |
|
|
/* Scan along the opcodes for this branch. If we get to the end of the |
/* Scan along the opcodes for this branch. If we get to the end of the |
branch, check the length against that of the other branches. */ |
branch, check the length against that of the other branches. */ |
Line 96 branch, check the length against that of the other bra
|
Line 97 branch, check the length against that of the other bra
|
for (;;) |
for (;;) |
{ |
{ |
int d, min; |
int d, min; |
uschar *cs, *ce; | pcre_uchar *cs, *ce; |
register int op = *cc; |
register int op = *cc; |
|
|
switch (op) |
switch (op) |
Line 189 for (;;)
|
Line 190 for (;;)
|
case OP_DOLLM: |
case OP_DOLLM: |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
cc += _pcre_OP_lengths[*cc]; | cc += PRIV(OP_lengths)[*cc]; |
break; |
break; |
|
|
/* Skip over a subpattern that has a {0} or {0,x} quantifier */ |
/* Skip over a subpattern that has a {0} or {0,x} quantifier */ |
Line 198 for (;;)
|
Line 199 for (;;)
|
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
case OP_BRAPOSZERO: |
case OP_BRAPOSZERO: |
case OP_SKIPZERO: |
case OP_SKIPZERO: |
cc += _pcre_OP_lengths[*cc]; | cc += PRIV(OP_lengths)[*cc]; |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
break; |
break; |
Line 223 for (;;)
|
Line 224 for (;;)
|
case OP_NOTPOSPLUSI: |
case OP_NOTPOSPLUSI: |
branchlength++; |
branchlength++; |
cc += 2; |
cc += 2; |
#ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
#endif |
#endif |
break; |
break; |
|
|
Line 243 for (;;)
|
Line 244 for (;;)
|
case OP_NOTEXACT: |
case OP_NOTEXACT: |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
cc += 4; | cc += 2 + IMM2_SIZE; |
#ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
#endif |
#endif |
break; |
break; |
|
|
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4; | cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP |
| || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); |
break; |
break; |
|
|
/* Handle single-char non-literal matchers */ |
/* Handle single-char non-literal matchers */ |
Line 291 for (;;)
|
Line 293 for (;;)
|
appear, but leave the code, just in case.) */ |
appear, but leave the code, just in case.) */ |
|
|
case OP_ANYBYTE: |
case OP_ANYBYTE: |
#ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
if (utf8) return -1; | if (utf) return -1; |
#endif |
#endif |
branchlength++; |
branchlength++; |
cc++; |
cc++; |
Line 308 for (;;)
|
Line 310 for (;;)
|
case OP_TYPEPOSSTAR: |
case OP_TYPEPOSSTAR: |
case OP_TYPEPOSQUERY: |
case OP_TYPEPOSQUERY: |
if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2; |
if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2; |
cc += _pcre_OP_lengths[op]; | cc += PRIV(OP_lengths)[op]; |
break; |
break; |
|
|
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
case OP_TYPEPOSUPTO: |
case OP_TYPEPOSUPTO: |
if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; | if (cc[1 + IMM2_SIZE] == OP_PROP |
cc += _pcre_OP_lengths[op]; | || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; |
| cc += PRIV(OP_lengths)[op]; |
break; |
break; |
|
|
/* Check a class for variable quantification */ |
/* Check a class for variable quantification */ |
|
|
#ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
case OP_XCLASS: |
case OP_XCLASS: |
cc += GET(cc, 1) - 33; | cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; |
/* Fall through */ |
/* Fall through */ |
#endif |
#endif |
|
|
case OP_CLASS: |
case OP_CLASS: |
case OP_NCLASS: |
case OP_NCLASS: |
cc += 33; | cc += PRIV(OP_lengths)[OP_CLASS]; |
|
|
switch (*cc) |
switch (*cc) |
{ |
{ |
Line 347 for (;;)
|
Line 350 for (;;)
|
case OP_CRRANGE: |
case OP_CRRANGE: |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
cc += 5; | cc += 1 + 2 * IMM2_SIZE; |
break; |
break; |
|
|
default: |
default: |
Line 372 for (;;)
|
Line 375 for (;;)
|
case OP_REFI: |
case OP_REFI: |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
{ |
{ |
ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1)); | ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); |
if (cs == NULL) return -2; |
if (cs == NULL) return -2; |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
if (cc > cs && cc < ce) |
if (cc > cs && cc < ce) |
Line 386 for (;;)
|
Line 389 for (;;)
|
} |
} |
} |
} |
else d = 0; |
else d = 0; |
cc += 3; | cc += 1 + IMM2_SIZE; |
|
|
/* Handle repeated back references */ |
/* Handle repeated back references */ |
|
|
Line 409 for (;;)
|
Line 412 for (;;)
|
case OP_CRRANGE: |
case OP_CRRANGE: |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
min = GET2(cc, 1); |
min = GET2(cc, 1); |
cc += 5; | cc += 1 + 2 * IMM2_SIZE; |
break; |
break; |
|
|
default: |
default: |
Line 424 for (;;)
|
Line 427 for (;;)
|
caught by a recursion depth count. */ |
caught by a recursion depth count. */ |
|
|
case OP_RECURSE: |
case OP_RECURSE: |
cs = ce = (uschar *)startcode + GET(cc, 1); | cs = ce = (pcre_uchar *)startcode + GET(cc, 1); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
had_recurse = TRUE; |
had_recurse = TRUE; |
Line 482 for (;;)
|
Line 485 for (;;)
|
case OP_NOTPOSQUERY: |
case OP_NOTPOSQUERY: |
case OP_NOTPOSQUERYI: |
case OP_NOTPOSQUERYI: |
|
|
cc += _pcre_OP_lengths[op]; | cc += PRIV(OP_lengths)[op]; |
#ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
#endif |
#endif |
break; |
break; |
|
|
Line 494 for (;;)
|
Line 497 for (;;)
|
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
cc += _pcre_OP_lengths[op] + cc[1]; | cc += PRIV(OP_lengths)[op] + cc[1]; |
break; |
break; |
|
|
/* The remaining opcodes are just skipped over. */ |
/* The remaining opcodes are just skipped over. */ |
Line 506 for (;;)
|
Line 509 for (;;)
|
case OP_SET_SOM: |
case OP_SET_SOM: |
case OP_SKIP: |
case OP_SKIP: |
case OP_THEN: |
case OP_THEN: |
cc += _pcre_OP_lengths[op]; | cc += PRIV(OP_lengths)[op]; |
break; |
break; |
|
|
/* This should not occur: we list all opcodes explicitly so that when |
/* This should not occur: we list all opcodes explicitly so that when |
Line 535 Arguments:
|
Line 538 Arguments:
|
p points to the character |
p points to the character |
caseless the caseless flag |
caseless the caseless flag |
cd the block with char table pointers |
cd the block with char table pointers |
utf8 TRUE for UTF-8 mode | utf TRUE for UTF-8 / UTF-16 mode |
|
|
Returns: pointer after the character |
Returns: pointer after the character |
*/ |
*/ |
|
|
static const uschar * | static const pcre_uchar * |
set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless, | set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, |
compile_data *cd, BOOL utf8) | compile_data *cd, BOOL utf) |
{ |
{ |
unsigned int c = *p; |
unsigned int c = *p; |
|
|
|
#ifdef COMPILE_PCRE8 |
SET_BIT(c); |
SET_BIT(c); |
|
|
#ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
if (utf8 && c > 127) | if (utf && c > 127) |
{ |
{ |
GETCHARINC(c, p); |
GETCHARINC(c, p); |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
if (caseless) |
if (caseless) |
{ |
{ |
uschar buff[8]; | pcre_uchar buff[6]; |
c = UCD_OTHERCASE(c); |
c = UCD_OTHERCASE(c); |
(void)_pcre_ord2utf8(c, buff); | (void)PRIV(ord2utf)(c, buff); |
SET_BIT(buff[0]); |
SET_BIT(buff[0]); |
} |
} |
#endif |
#endif |
Line 569 if (utf8 && c > 127)
|
Line 573 if (utf8 && c > 127)
|
|
|
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
return p + 1; |
return p + 1; |
|
#endif |
|
|
|
#ifdef COMPILE_PCRE16 |
|
if (c > 0xff) |
|
{ |
|
c = 0xff; |
|
caseless = FALSE; |
|
} |
|
SET_BIT(c); |
|
|
|
#ifdef SUPPORT_UTF |
|
if (utf && c > 127) |
|
{ |
|
GETCHARINC(c, p); |
|
#ifdef SUPPORT_UCP |
|
if (caseless) |
|
{ |
|
c = UCD_OTHERCASE(c); |
|
if (c > 0xff) |
|
c = 0xff; |
|
SET_BIT(c); |
|
} |
|
#endif |
|
return p; |
|
} |
|
#endif |
|
|
|
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
|
return p + 1; |
|
#endif |
} |
} |
|
|
|
|
Line 594 Returns: nothing
|
Line 628 Returns: nothing
|
*/ |
*/ |
|
|
static void |
static void |
set_type_bits(uschar *start_bits, int cbit_type, int table_limit, | set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, |
compile_data *cd) |
compile_data *cd) |
{ |
{ |
register int c; |
register int c; |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
if (table_limit == 32) return; |
if (table_limit == 32) return; |
for (c = 128; c < 256; c++) |
for (c = 128; c < 256; c++) |
{ |
{ |
if ((cd->cbits[c/8] & (1 << (c&7))) != 0) |
if ((cd->cbits[c/8] & (1 << (c&7))) != 0) |
{ |
{ |
uschar buff[8]; | pcre_uchar buff[6]; |
(void)_pcre_ord2utf8(c, buff); | (void)PRIV(ord2utf)(c, buff); |
SET_BIT(buff[0]); |
SET_BIT(buff[0]); |
} |
} |
} |
} |
|
#endif |
} |
} |
|
|
|
|
Line 634 Returns: nothing
|
Line 670 Returns: nothing
|
*/ |
*/ |
|
|
static void |
static void |
set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit, | set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, |
compile_data *cd) |
compile_data *cd) |
{ |
{ |
register int c; |
register int c; |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
|
#endif |
} |
} |
|
|
|
|
Line 659 function fails unless the result is SSB_DONE.
|
Line 697 function fails unless the result is SSB_DONE.
|
Arguments: |
Arguments: |
code points to an expression |
code points to an expression |
start_bits points to a 32-byte table, initialized to 0 |
start_bits points to a 32-byte table, initialized to 0 |
utf8 TRUE if in UTF-8 mode | utf TRUE if in UTF-8 / UTF-16 mode |
cd the block with char table pointers |
cd the block with char table pointers |
|
|
Returns: SSB_FAIL => Failed to find any starting bytes |
Returns: SSB_FAIL => Failed to find any starting bytes |
Line 669 Returns: SSB_FAIL => Failed to find any star
|
Line 707 Returns: SSB_FAIL => Failed to find any star
|
*/ |
*/ |
|
|
static int |
static int |
set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8, | set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, |
compile_data *cd) |
compile_data *cd) |
{ |
{ |
register int c; |
register int c; |
int yield = SSB_DONE; |
int yield = SSB_DONE; |
int table_limit = utf8? 16:32; | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| int table_limit = utf? 16:32; |
| #else |
| int table_limit = 32; |
| #endif |
|
|
#if 0 |
#if 0 |
/* ========================================================================= */ |
/* ========================================================================= */ |
Line 696 volatile int dummy;
|
Line 738 volatile int dummy;
|
do |
do |
{ |
{ |
BOOL try_next = TRUE; |
BOOL try_next = TRUE; |
const uschar *tcode = code + 1 + LINK_SIZE; | const pcre_uchar *tcode = code + 1 + LINK_SIZE; |
|
|
if (*code == OP_CBRA || *code == OP_SCBRA || |
if (*code == OP_CBRA || *code == OP_SCBRA || |
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2; | *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; |
|
|
while (try_next) /* Loop for items in this branch */ |
while (try_next) /* Loop for items in this branch */ |
{ |
{ |
Line 785 do
|
Line 827 do
|
case OP_SOM: |
case OP_SOM: |
case OP_THEN: |
case OP_THEN: |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
case OP_XCLASS: |
case OP_XCLASS: |
|
#endif |
return SSB_FAIL; |
return SSB_FAIL; |
|
|
/* We can ignore word boundary tests. */ |
/* We can ignore word boundary tests. */ |
Line 811 do
|
Line 855 do
|
case OP_ONCE: |
case OP_ONCE: |
case OP_ONCE_NC: |
case OP_ONCE_NC: |
case OP_ASSERT: |
case OP_ASSERT: |
rc = set_start_bits(tcode, start_bits, utf8, cd); | rc = set_start_bits(tcode, start_bits, utf, cd); |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_DONE) try_next = FALSE; else |
if (rc == SSB_DONE) try_next = FALSE; else |
{ |
{ |
Line 858 do
|
Line 902 do
|
case OP_BRAZERO: |
case OP_BRAZERO: |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
case OP_BRAPOSZERO: |
case OP_BRAPOSZERO: |
rc = set_start_bits(++tcode, start_bits, utf8, cd); | rc = set_start_bits(++tcode, start_bits, utf, cd); |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
/* ========================================================================= |
/* ========================================================================= |
See the comment at the head of this function concerning the next line, |
See the comment at the head of this function concerning the next line, |
Line 885 do
|
Line 929 do
|
case OP_QUERY: |
case OP_QUERY: |
case OP_MINQUERY: |
case OP_MINQUERY: |
case OP_POSQUERY: |
case OP_POSQUERY: |
tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); | tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); |
break; |
break; |
|
|
case OP_STARI: |
case OP_STARI: |
Line 894 do
|
Line 938 do
|
case OP_QUERYI: |
case OP_QUERYI: |
case OP_MINQUERYI: |
case OP_MINQUERYI: |
case OP_POSQUERYI: |
case OP_POSQUERYI: |
tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8); | tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); |
break; |
break; |
|
|
/* Single-char upto sets the bit and tries the next */ |
/* Single-char upto sets the bit and tries the next */ |
Line 902 do
|
Line 946 do
|
case OP_UPTO: |
case OP_UPTO: |
case OP_MINUPTO: |
case OP_MINUPTO: |
case OP_POSUPTO: |
case OP_POSUPTO: |
tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8); | tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf); |
break; |
break; |
|
|
case OP_UPTOI: |
case OP_UPTOI: |
case OP_MINUPTOI: |
case OP_MINUPTOI: |
case OP_POSUPTOI: |
case OP_POSUPTOI: |
tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8); | tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf); |
break; |
break; |
|
|
/* At least one single char sets the bit and stops */ |
/* At least one single char sets the bit and stops */ |
|
|
case OP_EXACT: |
case OP_EXACT: |
tcode += 2; | tcode += IMM2_SIZE; |
/* Fall through */ |
/* Fall through */ |
case OP_CHAR: |
case OP_CHAR: |
case OP_PLUS: |
case OP_PLUS: |
case OP_MINPLUS: |
case OP_MINPLUS: |
case OP_POSPLUS: |
case OP_POSPLUS: |
(void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); | (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); |
try_next = FALSE; |
try_next = FALSE; |
break; |
break; |
|
|
case OP_EXACTI: |
case OP_EXACTI: |
tcode += 2; | tcode += IMM2_SIZE; |
/* Fall through */ |
/* Fall through */ |
case OP_CHARI: |
case OP_CHARI: |
case OP_PLUSI: |
case OP_PLUSI: |
case OP_MINPLUSI: |
case OP_MINPLUSI: |
case OP_POSPLUSI: |
case OP_POSPLUSI: |
(void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8); | (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); |
try_next = FALSE; |
try_next = FALSE; |
break; |
break; |
|
|
Line 944 do
|
Line 988 do
|
case OP_HSPACE: |
case OP_HSPACE: |
SET_BIT(0x09); |
SET_BIT(0x09); |
SET_BIT(0x20); |
SET_BIT(0x20); |
if (utf8) | #ifdef SUPPORT_UTF |
| if (utf) |
{ |
{ |
|
#ifdef COMPILE_PCRE8 |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
|
#endif |
|
#ifdef COMPILE_PCRE16 |
|
SET_BIT(0xA0); |
|
SET_BIT(0xFF); /* For characters > 255 */ |
|
#endif |
} |
} |
else SET_BIT(0xA0); | else |
| #endif /* SUPPORT_UTF */ |
| { |
| SET_BIT(0xA0); |
| #ifdef COMPILE_PCRE16 |
| SET_BIT(0xFF); /* For characters > 255 */ |
| #endif |
| } |
try_next = FALSE; |
try_next = FALSE; |
break; |
break; |
|
|
Line 961 do
|
Line 1019 do
|
SET_BIT(0x0B); |
SET_BIT(0x0B); |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
if (utf8) | #ifdef SUPPORT_UTF |
| if (utf) |
{ |
{ |
|
#ifdef COMPILE_PCRE8 |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
|
#endif |
|
#ifdef COMPILE_PCRE16 |
|
SET_BIT(0x85); |
|
SET_BIT(0xFF); /* For characters > 255 */ |
|
#endif |
} |
} |
else SET_BIT(0x85); | else |
| #endif /* SUPPORT_UTF */ |
| { |
| SET_BIT(0x85); |
| #ifdef COMPILE_PCRE16 |
| SET_BIT(0xFF); /* For characters > 255 */ |
| #endif |
| } |
try_next = FALSE; |
try_next = FALSE; |
break; |
break; |
|
|
Line 1024 do
|
Line 1096 do
|
break; |
break; |
|
|
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
tcode += 3; | tcode += 1 + IMM2_SIZE; |
break; |
break; |
|
|
/* Zero or more repeats of character types set the bits and then |
/* Zero or more repeats of character types set the bits and then |
Line 1033 do
|
Line 1105 do
|
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
case OP_TYPEPOSUPTO: |
case OP_TYPEPOSUPTO: |
tcode += 2; /* Fall through */ | tcode += IMM2_SIZE; /* Fall through */ |
|
|
case OP_TYPESTAR: |
case OP_TYPESTAR: |
case OP_TYPEMINSTAR: |
case OP_TYPEMINSTAR: |
Line 1051 do
|
Line 1123 do
|
case OP_HSPACE: |
case OP_HSPACE: |
SET_BIT(0x09); |
SET_BIT(0x09); |
SET_BIT(0x20); |
SET_BIT(0x20); |
if (utf8) | #ifdef COMPILE_PCRE8 |
| if (utf) |
{ |
{ |
|
#ifdef COMPILE_PCRE8 |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
|
#endif |
|
#ifdef COMPILE_PCRE16 |
|
SET_BIT(0xA0); |
|
SET_BIT(0xFF); /* For characters > 255 */ |
|
#endif |
} |
} |
else SET_BIT(0xA0); | else |
| #endif /* SUPPORT_UTF */ |
| SET_BIT(0xA0); |
break; |
break; |
|
|
case OP_ANYNL: |
case OP_ANYNL: |
Line 1067 do
|
Line 1148 do
|
SET_BIT(0x0B); |
SET_BIT(0x0B); |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
if (utf8) | #ifdef COMPILE_PCRE8 |
| if (utf) |
{ |
{ |
|
#ifdef COMPILE_PCRE8 |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
|
#endif |
|
#ifdef COMPILE_PCRE16 |
|
SET_BIT(0x85); |
|
SET_BIT(0xFF); /* For characters > 255 */ |
|
#endif |
} |
} |
else SET_BIT(0x85); | else |
| #endif /* SUPPORT_UTF */ |
| SET_BIT(0x85); |
break; |
break; |
|
|
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
Line 1119 do
|
Line 1209 do
|
character with a value > 255. */ |
character with a value > 255. */ |
|
|
case OP_NCLASS: |
case OP_NCLASS: |
#ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
if (utf8) | if (utf) |
{ |
{ |
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ |
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ |
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
} |
} |
#endif |
#endif |
|
#ifdef COMPILE_PCRE16 |
|
SET_BIT(0xFF); /* For characters > 255 */ |
|
#endif |
/* Fall through */ |
/* Fall through */ |
|
|
case OP_CLASS: |
case OP_CLASS: |
{ |
{ |
|
pcre_uint8 *map; |
tcode++; |
tcode++; |
|
map = (pcre_uint8 *)tcode; |
|
|
/* In UTF-8 mode, the bits in a bit map correspond to character |
/* In UTF-8 mode, the bits in a bit map correspond to character |
values, not to byte values. However, the bit map we are constructing is |
values, not to byte values. However, the bit map we are constructing is |
Line 1138 do
|
Line 1233 do
|
value is > 127. In fact, there are only two possible starting bytes for |
value is > 127. In fact, there are only two possible starting bytes for |
characters in the range 128 - 255. */ |
characters in the range 128 - 255. */ |
|
|
#ifdef SUPPORT_UTF8 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
if (utf8) | if (utf) |
{ |
{ |
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; | for (c = 0; c < 16; c++) start_bits[c] |= map[c]; |
for (c = 128; c < 256; c++) |
for (c = 128; c < 256; c++) |
{ |
{ |
if ((tcode[c/8] && (1 << (c&7))) != 0) | if ((map[c/8] && (1 << (c&7))) != 0) |
{ |
{ |
int d = (c >> 6) | 0xc0; /* Set bit for this starter */ |
int d = (c >> 6) | 0xc0; /* Set bit for this starter */ |
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ |
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ |
Line 1152 do
|
Line 1247 do
|
} |
} |
} |
} |
} |
} |
|
|
/* In non-UTF-8 mode, the two bit maps are completely compatible. */ |
|
|
|
else |
else |
#endif |
#endif |
{ |
{ |
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; | /* In non-UTF-8 mode, the two bit maps are completely compatible. */ |
| for (c = 0; c < 32; c++) start_bits[c] |= map[c]; |
} |
} |
|
|
/* Advance past the bit map, and act on what follows. For a zero |
/* Advance past the bit map, and act on what follows. For a zero |
minimum repeat, continue; otherwise stop processing. */ |
minimum repeat, continue; otherwise stop processing. */ |
|
|
tcode += 32; | tcode += 32 / sizeof(pcre_uchar); |
switch (*tcode) |
switch (*tcode) |
{ |
{ |
case OP_CRSTAR: |
case OP_CRSTAR: |
Line 1176 do
|
Line 1269 do
|
|
|
case OP_CRRANGE: |
case OP_CRRANGE: |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; | if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; |
else try_next = FALSE; |
else try_next = FALSE; |
break; |
break; |
|
|
Line 1205 return yield;
|
Line 1298 return yield;
|
*************************************************/ |
*************************************************/ |
|
|
/* This function is handed a compiled expression that it must study to produce |
/* This function is handed a compiled expression that it must study to produce |
information that will speed up the matching. It returns a pcre_extra block | information that will speed up the matching. It returns a pcre[16]_extra block |
which then gets handed back to pcre_exec(). |
which then gets handed back to pcre_exec(). |
|
|
Arguments: |
Arguments: |
Line 1214 Arguments:
|
Line 1307 Arguments:
|
errorptr points to where to place error messages; |
errorptr points to where to place error messages; |
set NULL unless error |
set NULL unless error |
|
|
Returns: pointer to a pcre_extra block, with study_data filled in and the | Returns: pointer to a pcre[16]_extra block, with study_data filled in and |
appropriate flags set; | the appropriate flags set; |
NULL on error or if no optimization possible |
NULL on error or if no optimization possible |
*/ |
*/ |
|
|
|
#ifdef COMPILE_PCRE8 |
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION |
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION |
pcre_study(const pcre *external_re, int options, const char **errorptr) |
pcre_study(const pcre *external_re, int options, const char **errorptr) |
|
#else |
|
PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION |
|
pcre16_study(const pcre16 *external_re, int options, const char **errorptr) |
|
#endif |
{ |
{ |
int min; |
int min; |
BOOL bits_set = FALSE; |
BOOL bits_set = FALSE; |
uschar start_bits[32]; | pcre_uint8 start_bits[32]; |
pcre_extra *extra = NULL; | PUBL(extra) *extra = NULL; |
pcre_study_data *study; |
pcre_study_data *study; |
const uschar *tables; | const pcre_uint8 *tables; |
uschar *code; | pcre_uchar *code; |
compile_data compile_block; |
compile_data compile_block; |
const real_pcre *re = (const real_pcre *)external_re; | const REAL_PCRE *re = (const REAL_PCRE *)external_re; |
|
|
*errorptr = NULL; |
*errorptr = NULL; |
|
|
Line 1240 if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
Line 1338 if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
return NULL; |
return NULL; |
} |
} |
|
|
|
if ((re->flags & PCRE_MODE) == 0) |
|
{ |
|
#ifdef COMPILE_PCRE8 |
|
*errorptr = "argument is compiled in 16 bit mode"; |
|
#else |
|
*errorptr = "argument is compiled in 8 bit mode"; |
|
#endif |
|
return NULL; |
|
} |
|
|
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) |
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) |
{ |
{ |
*errorptr = "unknown or incorrect option bit(s) set"; |
*errorptr = "unknown or incorrect option bit(s) set"; |
return NULL; |
return NULL; |
} |
} |
|
|
code = (uschar *)re + re->name_table_offset + | code = (pcre_uchar *)re + re->name_table_offset + |
(re->name_count * re->name_entry_size); |
(re->name_count * re->name_entry_size); |
|
|
/* For an anchored pattern, or an unanchored pattern that has a first char, or |
/* For an anchored pattern, or an unanchored pattern that has a first char, or |
Line 1261 if ((re->options & PCRE_ANCHORED) == 0 &&
|
Line 1369 if ((re->options & PCRE_ANCHORED) == 0 &&
|
/* Set the character tables in the block that is passed around */ |
/* Set the character tables in the block that is passed around */ |
|
|
tables = re->tables; |
tables = re->tables; |
|
|
|
#ifdef COMPILE_PCRE8 |
if (tables == NULL) |
if (tables == NULL) |
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
(void *)(&tables)); |
(void *)(&tables)); |
|
#else |
|
if (tables == NULL) |
|
(void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
|
(void *)(&tables)); |
|
#endif |
|
|
compile_block.lcc = tables + lcc_offset; |
compile_block.lcc = tables + lcc_offset; |
compile_block.fcc = tables + fcc_offset; |
compile_block.fcc = tables + fcc_offset; |
Line 1272 if ((re->options & PCRE_ANCHORED) == 0 &&
|
Line 1387 if ((re->options & PCRE_ANCHORED) == 0 &&
|
|
|
/* See if we can find a fixed set of initial characters for the pattern. */ |
/* See if we can find a fixed set of initial characters for the pattern. */ |
|
|
memset(start_bits, 0, 32 * sizeof(uschar)); | memset(start_bits, 0, 32 * sizeof(pcre_uint8)); |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
&compile_block); |
&compile_block); |
bits_set = rc == SSB_DONE; |
bits_set = rc == SSB_DONE; |
Line 1293 switch(min = find_minlength(code, code, re->options, 0
|
Line 1408 switch(min = find_minlength(code, code, re->options, 0
|
} |
} |
|
|
/* If a set of starting bytes has been identified, or if the minimum length is |
/* If a set of starting bytes has been identified, or if the minimum length is |
greater than zero, or if JIT optimization has been requested, get a pcre_extra | greater than zero, or if JIT optimization has been requested, get a |
block and a pcre_study_data block. The study data is put in the latter, which | pcre[16]_extra block and a pcre_study_data block. The study data is put in the |
is pointed to by the former, which may also get additional data set later by | latter, which is pointed to by the former, which may also get additional data |
the calling program. At the moment, the size of pcre_study_data is fixed. We | set later by the calling program. At the moment, the size of pcre_study_data |
nevertheless save it in a field for returning via the pcre_fullinfo() function | is fixed. We nevertheless save it in a field for returning via the |
so that if it becomes variable in the future, we don't have to change that | pcre_fullinfo() function so that if it becomes variable in the future, |
code. */ | we don't have to change that code. */ |
|
|
if (bits_set || min > 0 |
if (bits_set || min > 0 |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
Line 1307 if (bits_set || min > 0
|
Line 1422 if (bits_set || min > 0
|
#endif |
#endif |
) |
) |
{ |
{ |
extra = (pcre_extra *)(pcre_malloc) | extra = (PUBL(extra) *)(PUBL(malloc)) |
(sizeof(pcre_extra) + sizeof(pcre_study_data)); | (sizeof(PUBL(extra)) + sizeof(pcre_study_data)); |
if (extra == NULL) |
if (extra == NULL) |
{ |
{ |
*errorptr = "failed to get memory"; |
*errorptr = "failed to get memory"; |
return NULL; |
return NULL; |
} |
} |
|
|
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); | study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra))); |
extra->flags = PCRE_EXTRA_STUDY_DATA; |
extra->flags = PCRE_EXTRA_STUDY_DATA; |
extra->study_data = study; |
extra->study_data = study; |
|
|
Line 1331 if (bits_set || min > 0
|
Line 1446 if (bits_set || min > 0
|
study->flags |= PCRE_STUDY_MAPPED; |
study->flags |= PCRE_STUDY_MAPPED; |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
} |
} |
else memset(study->start_bits, 0, 32 * sizeof(uschar)); | else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8)); |
|
|
|
#ifdef PCRE_DEBUG |
|
if (bits_set) |
|
{ |
|
pcre_uint8 *ptr = start_bits; |
|
int i; |
|
|
|
printf("Start bits:\n"); |
|
for (i = 0; i < 32; i++) |
|
printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n"); |
|
} |
|
#endif |
|
|
/* Always set the minlength value in the block, because the JIT compiler |
/* Always set the minlength value in the block, because the JIT compiler |
makes use of it. However, don't set the bit unless the length is greater than |
makes use of it. However, don't set the bit unless the length is greater than |
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time |
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time |
Line 1351 if (bits_set || min > 0
|
Line 1478 if (bits_set || min > 0
|
|
|
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
extra->executable_jit = NULL; |
extra->executable_jit = NULL; |
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra); | if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra); |
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) |
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) |
{ |
{ |
|
#ifdef COMPILE_PCRE8 |
pcre_free_study(extra); |
pcre_free_study(extra); |
|
#endif |
|
#ifdef COMPILE_PCRE16 |
|
pcre16_free_study(extra); |
|
#endif |
extra = NULL; |
extra = NULL; |
} |
} |
#endif |
#endif |
Line 1370 return extra;
|
Line 1502 return extra;
|
|
|
/* This function frees the memory that was obtained by pcre_study(). |
/* This function frees the memory that was obtained by pcre_study(). |
|
|
Argument: a pointer to the pcre_extra block | Argument: a pointer to the pcre[16]_extra block |
Returns: nothing |
Returns: nothing |
*/ |
*/ |
|
|
|
#ifdef COMPILE_PCRE8 |
PCRE_EXP_DEFN void |
PCRE_EXP_DEFN void |
pcre_free_study(pcre_extra *extra) |
pcre_free_study(pcre_extra *extra) |
|
#else |
|
PCRE_EXP_DEFN void |
|
pcre16_free_study(pcre16_extra *extra) |
|
#endif |
{ |
{ |
|
if (extra == NULL) |
|
return; |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
extra->executable_jit != NULL) |
extra->executable_jit != NULL) |
_pcre_jit_free(extra->executable_jit); | PRIV(jit_free)(extra->executable_jit); |
#endif |
#endif |
pcre_free(extra); | PUBL(free)(extra); |
} |
} |
|
|
/* End of pcre_study.c */ |
/* End of pcre_study.c */ |