Diff for /embedaddon/pcre/pcre_study.c between versions 1.1.1.4 and 1.1.1.5

version 1.1.1.4, 2013/07/22 08:25:55 version 1.1.1.5, 2014/06/15 19:46:03
Line 66  string of that length that matches. In UTF8 mode, the  Line 66  string of that length that matches. In UTF8 mode, the 
 rather than bytes.  rather than bytes.
   
 Arguments:  Arguments:
     re              compiled pattern block
   code            pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
  startcode       pointer to start of the whole pattern  startcode       pointer to start of the whole pattern's code
   options         the compiling options    options         the compiling options
   int             RECURSE depth    int             RECURSE depth
   
Line 78  Returns:   the minimum length Line 79  Returns:   the minimum length
 */  */
   
 static int  static int
find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
  int recurse_depth)  const pcre_uchar *startcode, int options, int recurse_depth)
 {  {
 int length = -1;  int length = -1;
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
Line 129  for (;;) Line 130  for (;;)
     case OP_SBRAPOS:      case OP_SBRAPOS:
     case OP_ONCE:      case OP_ONCE:
     case OP_ONCE_NC:      case OP_ONCE_NC:
    d = find_minlength(cc, startcode, options, recurse_depth);    d = find_minlength(re, cc, startcode, options, recurse_depth);
     if (d < 0) return d;      if (d < 0) return d;
     branchlength += d;      branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
Line 175  for (;;) Line 176  for (;;)
   
     case OP_REVERSE:      case OP_REVERSE:
     case OP_CREF:      case OP_CREF:
    case OP_NCREF:    case OP_DNCREF:
     case OP_RREF:      case OP_RREF:
    case OP_NRREF:    case OP_DNRREF:
     case OP_DEF:      case OP_DEF:
     case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:      case OP_SOD:
Line 341  for (;;) Line 342  for (;;)
       {        {
       case OP_CRPLUS:        case OP_CRPLUS:
       case OP_CRMINPLUS:        case OP_CRMINPLUS:
         case OP_CRPOSPLUS:
       branchlength++;        branchlength++;
       /* Fall through */        /* Fall through */
   
Line 348  for (;;) Line 350  for (;;)
       case OP_CRMINSTAR:        case OP_CRMINSTAR:
       case OP_CRQUERY:        case OP_CRQUERY:
       case OP_CRMINQUERY:        case OP_CRMINQUERY:
         case OP_CRPOSSTAR:
         case OP_CRPOSQUERY:
       cc++;        cc++;
       break;        break;
   
       case OP_CRRANGE:        case OP_CRRANGE:
       case OP_CRMINRANGE:        case OP_CRMINRANGE:
         case OP_CRPOSRANGE:
       branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
       cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
       break;        break;
Line 375  for (;;) Line 380  for (;;)
     matches an empty string (by default it causes a matching failure), so in      matches an empty string (by default it causes a matching failure), so in
     that case we must set the minimum length to zero. */      that case we must set the minimum length to zero. */
   
    case OP_REF:    case OP_DNREF:     /* Duplicate named pattern back reference */
     case OP_DNREFI:
     if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
       {
       int count = GET2(cc, 1+IMM2_SIZE);
       pcre_uchar *slot = (pcre_uchar *)re +
         re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
       d = INT_MAX;
       while (count-- > 0)
         {
         ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
         if (cs == NULL) return -2;
         do ce += GET(ce, 1); while (*ce == OP_ALT);
         if (cc > cs && cc < ce)
           {
           d = 0;
           had_recurse = TRUE;
           break;
           }
         else
           {
           int dd = find_minlength(re, cs, startcode, options, recurse_depth);
           if (dd < d) d = dd;
           }
         slot += re->name_entry_size;
         }
       }
     else d = 0;
     cc += 1 + 2*IMM2_SIZE;
     goto REPEAT_BACK_REFERENCE;
 
     case OP_REF:      /* Single back reference */
     case OP_REFI:      case OP_REFI:
     if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
       {        {
Line 389  for (;;) Line 425  for (;;)
         }          }
       else        else
         {          {
        d = find_minlength(cs, startcode, options, recurse_depth);        d = find_minlength(re, cs, startcode, options, recurse_depth);
         }          }
       }        }
     else d = 0;      else d = 0;
Line 397  for (;;) Line 433  for (;;)
   
     /* Handle repeated back references */      /* Handle repeated back references */
   
       REPEAT_BACK_REFERENCE:
     switch (*cc)      switch (*cc)
       {        {
       case OP_CRSTAR:        case OP_CRSTAR:
       case OP_CRMINSTAR:        case OP_CRMINSTAR:
       case OP_CRQUERY:        case OP_CRQUERY:
       case OP_CRMINQUERY:        case OP_CRMINQUERY:
         case OP_CRPOSSTAR:
         case OP_CRPOSQUERY:
       min = 0;        min = 0;
       cc++;        cc++;
       break;        break;
   
       case OP_CRPLUS:        case OP_CRPLUS:
       case OP_CRMINPLUS:        case OP_CRMINPLUS:
         case OP_CRPOSPLUS:
       min = 1;        min = 1;
       cc++;        cc++;
       break;        break;
   
       case OP_CRRANGE:        case OP_CRRANGE:
       case OP_CRMINRANGE:        case OP_CRMINRANGE:
         case OP_CRPOSRANGE:
       min = GET2(cc, 1);        min = GET2(cc, 1);
       cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
       break;        break;
Line 437  for (;;) Line 478  for (;;)
       had_recurse = TRUE;        had_recurse = TRUE;
     else      else
       {        {
      branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);      branchlength += find_minlength(re, cs, startcode, options,
         recurse_depth + 1);
       }        }
     cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
     break;      break;
Line 778  do Line 820  do
       case OP_COND:        case OP_COND:
       case OP_CREF:        case OP_CREF:
       case OP_DEF:        case OP_DEF:
         case OP_DNCREF:
         case OP_DNREF:
         case OP_DNREFI:
         case OP_DNRREF:
       case OP_DOLL:        case OP_DOLL:
       case OP_DOLLM:        case OP_DOLLM:
       case OP_END:        case OP_END:
Line 786  do Line 832  do
       case OP_EXTUNI:        case OP_EXTUNI:
       case OP_FAIL:        case OP_FAIL:
       case OP_MARK:        case OP_MARK:
       case OP_NCREF:  
       case OP_NOT:        case OP_NOT:
       case OP_NOTEXACT:        case OP_NOTEXACT:
       case OP_NOTEXACTI:        case OP_NOTEXACTI:
Line 818  do Line 863  do
       case OP_NOTUPTOI:        case OP_NOTUPTOI:
       case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
       case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
       case OP_NRREF:  
       case OP_PROP:        case OP_PROP:
       case OP_PRUNE:        case OP_PRUNE:
       case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
Line 1183  do Line 1227  do
         set_type_bits(start_bits, cbit_digit, table_limit, cd);          set_type_bits(start_bits, cbit_digit, table_limit, cd);
         break;          break;
   
        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we no longer
        ensure it gets set as not whitespace. Luckily, the code value is the        have to play fancy tricks because Perl added VT to its whitespace at
        same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate        release 5.18. PCRE added it at release 8.34. */
        bit. */ 
   
         case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
         set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] |= 0x08;  
         break;          break;
   
         /* The cbit_space table has vertical tab as whitespace; we have to  
         avoid setting it. Luckily, the code value is the same (0x0b) in ASCII  
         and EBCDIC, so we can just adjust the appropriate bit. */  
   
         case OP_WHITESPACE:          case OP_WHITESPACE:
         c = start_bits[1];    /* Save in case it was already set */  
         set_type_bits(start_bits, cbit_space, table_limit, cd);          set_type_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] = (start_bits[1] & ~0x08) | c;  
         break;          break;
   
         case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
Line 1277  do Line 1313  do
           case OP_CRMINSTAR:            case OP_CRMINSTAR:
           case OP_CRQUERY:            case OP_CRQUERY:
           case OP_CRMINQUERY:            case OP_CRMINQUERY:
             case OP_CRPOSSTAR:
             case OP_CRPOSQUERY:
           tcode++;            tcode++;
           break;            break;
   
           case OP_CRRANGE:            case OP_CRRANGE:
           case OP_CRMINRANGE:            case OP_CRMINRANGE:
             case OP_CRPOSRANGE:
           if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;            if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
             else try_next = FALSE;              else try_next = FALSE;
           break;            break;
Line 1346  pcre_uchar *code; Line 1385  pcre_uchar *code;
 compile_data compile_block;  compile_data compile_block;
 const REAL_PCRE *re = (const REAL_PCRE *)external_re;  const REAL_PCRE *re = (const REAL_PCRE *)external_re;
   
   
 *errorptr = NULL;  *errorptr = NULL;
   
 if (re == NULL || re->magic_number != MAGIC_NUMBER)  if (re == NULL || re->magic_number != MAGIC_NUMBER)
Line 1422  if ((re->options & PCRE_ANCHORED) == 0 && Line 1462  if ((re->options & PCRE_ANCHORED) == 0 &&
   
 /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
   
switch(min = find_minlength(code, code, re->options, 0))switch(min = find_minlength(re, code, code, re->options, 0))
   {    {
   case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
   case -3: *errorptr = "internal error: opcode not recognized"; return NULL;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;

Removed from v.1.1.1.4  
changed lines
  Added in v.1.1.1.5


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>