version 1.1.1.4, 2013/07/22 08:25:55
|
version 1.1.1.5, 2014/06/15 19:46:03
|
Line 66 string of that length that matches. In UTF8 mode, the
|
Line 66 string of that length that matches. In UTF8 mode, the
|
rather than bytes. |
rather than bytes. |
|
|
Arguments: |
Arguments: |
|
re compiled pattern block |
code pointer to start of group (the bracket) |
code pointer to start of group (the bracket) |
startcode pointer to start of the whole pattern | startcode pointer to start of the whole pattern's code |
options the compiling options |
options the compiling options |
int RECURSE depth |
int RECURSE depth |
|
|
Line 78 Returns: the minimum length
|
Line 79 Returns: the minimum length
|
*/ |
*/ |
|
|
static int |
static int |
find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, | find_minlength(const REAL_PCRE *re, const pcre_uchar *code, |
int recurse_depth) | const pcre_uchar *startcode, int options, int recurse_depth) |
{ |
{ |
int length = -1; |
int length = -1; |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
Line 129 for (;;)
|
Line 130 for (;;)
|
case OP_SBRAPOS: |
case OP_SBRAPOS: |
case OP_ONCE: |
case OP_ONCE: |
case OP_ONCE_NC: |
case OP_ONCE_NC: |
d = find_minlength(cc, startcode, options, recurse_depth); | d = find_minlength(re, cc, startcode, options, recurse_depth); |
if (d < 0) return d; |
if (d < 0) return d; |
branchlength += d; |
branchlength += d; |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
Line 175 for (;;)
|
Line 176 for (;;)
|
|
|
case OP_REVERSE: |
case OP_REVERSE: |
case OP_CREF: |
case OP_CREF: |
case OP_NCREF: | case OP_DNCREF: |
case OP_RREF: |
case OP_RREF: |
case OP_NRREF: | case OP_DNRREF: |
case OP_DEF: |
case OP_DEF: |
case OP_CALLOUT: |
case OP_CALLOUT: |
case OP_SOD: |
case OP_SOD: |
Line 341 for (;;)
|
Line 342 for (;;)
|
{ |
{ |
case OP_CRPLUS: |
case OP_CRPLUS: |
case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
|
case OP_CRPOSPLUS: |
branchlength++; |
branchlength++; |
/* Fall through */ |
/* Fall through */ |
|
|
Line 348 for (;;)
|
Line 350 for (;;)
|
case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
case OP_CRQUERY: |
case OP_CRQUERY: |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
|
case OP_CRPOSSTAR: |
|
case OP_CRPOSQUERY: |
cc++; |
cc++; |
break; |
break; |
|
|
case OP_CRRANGE: |
case OP_CRRANGE: |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
|
case OP_CRPOSRANGE: |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
cc += 1 + 2 * IMM2_SIZE; |
cc += 1 + 2 * IMM2_SIZE; |
break; |
break; |
Line 375 for (;;)
|
Line 380 for (;;)
|
matches an empty string (by default it causes a matching failure), so in |
matches an empty string (by default it causes a matching failure), so in |
that case we must set the minimum length to zero. */ |
that case we must set the minimum length to zero. */ |
|
|
case OP_REF: | case OP_DNREF: /* Duplicate named pattern back reference */ |
| case OP_DNREFI: |
| if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
| { |
| int count = GET2(cc, 1+IMM2_SIZE); |
| pcre_uchar *slot = (pcre_uchar *)re + |
| re->name_table_offset + GET2(cc, 1) * re->name_entry_size; |
| d = INT_MAX; |
| while (count-- > 0) |
| { |
| ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); |
| if (cs == NULL) return -2; |
| do ce += GET(ce, 1); while (*ce == OP_ALT); |
| if (cc > cs && cc < ce) |
| { |
| d = 0; |
| had_recurse = TRUE; |
| break; |
| } |
| else |
| { |
| int dd = find_minlength(re, cs, startcode, options, recurse_depth); |
| if (dd < d) d = dd; |
| } |
| slot += re->name_entry_size; |
| } |
| } |
| else d = 0; |
| cc += 1 + 2*IMM2_SIZE; |
| goto REPEAT_BACK_REFERENCE; |
| |
| case OP_REF: /* Single back reference */ |
case OP_REFI: |
case OP_REFI: |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
{ |
{ |
Line 389 for (;;)
|
Line 425 for (;;)
|
} |
} |
else |
else |
{ |
{ |
d = find_minlength(cs, startcode, options, recurse_depth); | d = find_minlength(re, cs, startcode, options, recurse_depth); |
} |
} |
} |
} |
else d = 0; |
else d = 0; |
Line 397 for (;;)
|
Line 433 for (;;)
|
|
|
/* Handle repeated back references */ |
/* Handle repeated back references */ |
|
|
|
REPEAT_BACK_REFERENCE: |
switch (*cc) |
switch (*cc) |
{ |
{ |
case OP_CRSTAR: |
case OP_CRSTAR: |
case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
case OP_CRQUERY: |
case OP_CRQUERY: |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
|
case OP_CRPOSSTAR: |
|
case OP_CRPOSQUERY: |
min = 0; |
min = 0; |
cc++; |
cc++; |
break; |
break; |
|
|
case OP_CRPLUS: |
case OP_CRPLUS: |
case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
|
case OP_CRPOSPLUS: |
min = 1; |
min = 1; |
cc++; |
cc++; |
break; |
break; |
|
|
case OP_CRRANGE: |
case OP_CRRANGE: |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
|
case OP_CRPOSRANGE: |
min = GET2(cc, 1); |
min = GET2(cc, 1); |
cc += 1 + 2 * IMM2_SIZE; |
cc += 1 + 2 * IMM2_SIZE; |
break; |
break; |
Line 437 for (;;)
|
Line 478 for (;;)
|
had_recurse = TRUE; |
had_recurse = TRUE; |
else |
else |
{ |
{ |
branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); | branchlength += find_minlength(re, cs, startcode, options, |
| recurse_depth + 1); |
} |
} |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
break; |
break; |
Line 778 do
|
Line 820 do
|
case OP_COND: |
case OP_COND: |
case OP_CREF: |
case OP_CREF: |
case OP_DEF: |
case OP_DEF: |
|
case OP_DNCREF: |
|
case OP_DNREF: |
|
case OP_DNREFI: |
|
case OP_DNRREF: |
case OP_DOLL: |
case OP_DOLL: |
case OP_DOLLM: |
case OP_DOLLM: |
case OP_END: |
case OP_END: |
Line 786 do
|
Line 832 do
|
case OP_EXTUNI: |
case OP_EXTUNI: |
case OP_FAIL: |
case OP_FAIL: |
case OP_MARK: |
case OP_MARK: |
case OP_NCREF: |
|
case OP_NOT: |
case OP_NOT: |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
Line 818 do
|
Line 863 do
|
case OP_NOTUPTOI: |
case OP_NOTUPTOI: |
case OP_NOT_HSPACE: |
case OP_NOT_HSPACE: |
case OP_NOT_VSPACE: |
case OP_NOT_VSPACE: |
case OP_NRREF: |
|
case OP_PROP: |
case OP_PROP: |
case OP_PRUNE: |
case OP_PRUNE: |
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
Line 1183 do
|
Line 1227 do
|
set_type_bits(start_bits, cbit_digit, table_limit, cd); |
set_type_bits(start_bits, cbit_digit, table_limit, cd); |
break; |
break; |
|
|
/* The cbit_space table has vertical tab as whitespace; we have to | /* The cbit_space table has vertical tab as whitespace; we no longer |
ensure it gets set as not whitespace. Luckily, the code value is the | have to play fancy tricks because Perl added VT to its whitespace at |
same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate | release 5.18. PCRE added it at release 8.34. */ |
bit. */ | |
|
|
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
start_bits[1] |= 0x08; |
|
break; |
break; |
|
|
/* The cbit_space table has vertical tab as whitespace; we have to |
|
avoid setting it. Luckily, the code value is the same (0x0b) in ASCII |
|
and EBCDIC, so we can just adjust the appropriate bit. */ |
|
|
|
case OP_WHITESPACE: |
case OP_WHITESPACE: |
c = start_bits[1]; /* Save in case it was already set */ |
|
set_type_bits(start_bits, cbit_space, table_limit, cd); |
set_type_bits(start_bits, cbit_space, table_limit, cd); |
start_bits[1] = (start_bits[1] & ~0x08) | c; |
|
break; |
break; |
|
|
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
Line 1277 do
|
Line 1313 do
|
case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
case OP_CRQUERY: |
case OP_CRQUERY: |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
|
case OP_CRPOSSTAR: |
|
case OP_CRPOSQUERY: |
tcode++; |
tcode++; |
break; |
break; |
|
|
case OP_CRRANGE: |
case OP_CRRANGE: |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
|
case OP_CRPOSRANGE: |
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; |
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; |
else try_next = FALSE; |
else try_next = FALSE; |
break; |
break; |
Line 1346 pcre_uchar *code;
|
Line 1385 pcre_uchar *code;
|
compile_data compile_block; |
compile_data compile_block; |
const REAL_PCRE *re = (const REAL_PCRE *)external_re; |
const REAL_PCRE *re = (const REAL_PCRE *)external_re; |
|
|
|
|
*errorptr = NULL; |
*errorptr = NULL; |
|
|
if (re == NULL || re->magic_number != MAGIC_NUMBER) |
if (re == NULL || re->magic_number != MAGIC_NUMBER) |
Line 1422 if ((re->options & PCRE_ANCHORED) == 0 &&
|
Line 1462 if ((re->options & PCRE_ANCHORED) == 0 &&
|
|
|
/* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
|
|
switch(min = find_minlength(code, code, re->options, 0)) | switch(min = find_minlength(re, code, code, re->options, 0)) |
{ |
{ |
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |