Diff for /embedaddon/pcre/pcre_exec.c between versions 1.1.1.4 and 1.1.1.5

version 1.1.1.4, 2013/07/22 08:25:56 version 1.1.1.5, 2014/06/15 19:46:04
Line 107  because the offset vector is always a multiple of 3 lo Line 107  because the offset vector is always a multiple of 3 lo
   
 /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
   
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
   
 #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
 /*************************************************  /*************************************************
Line 167  match_ref(int offset, register PCRE_PUCHAR eptr, int l Line 167  match_ref(int offset, register PCRE_PUCHAR eptr, int l
 {  {
 PCRE_PUCHAR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
#ifdef SUPPORT_UTF#if defined SUPPORT_UTF && defined SUPPORT_UCP
 BOOL utf = md->utf;  BOOL utf = md->utf;
 #endif  #endif
   
Line 195  ASCII characters. */ Line 195  ASCII characters. */
   
 if (caseless)  if (caseless)
   {    {
#ifdef SUPPORT_UTF#if defined SUPPORT_UTF && defined SUPPORT_UCP
#ifdef SUPPORT_UCP 
   if (utf)    if (utf)
     {      {
     /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
Line 230  if (caseless) Line 229  if (caseless)
     }      }
   else    else
 #endif  #endif
 #endif  
   
   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
   is no UCP support. */    is no UCP support. */
Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8, Line 310  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,
        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
       RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };       RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
   
 /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
 versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
Line 1173  for (;;) Line 1171  for (;;)
           ecode = md->start_code + code_offset;            ecode = md->start_code + code_offset;
           save_capture_last = md->capture_last;            save_capture_last = md->capture_last;
           matched_once = TRUE;            matched_once = TRUE;
             mstart = md->start_match_ptr;    /* In case \K changed it */
           continue;            continue;
           }            }
   
Line 1245  for (;;) Line 1244  for (;;)
         eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
         ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
         matched_once = TRUE;          matched_once = TRUE;
           mstart = md->start_match_ptr;   /* In case \K reset it */
         continue;          continue;
         }          }
   
Line 1274  for (;;) Line 1274  for (;;)
   
     /* Control never reaches here. */      /* Control never reaches here. */
   
    /* Conditional group: compilation checked that there are no more than    /* Conditional group: compilation checked that there are no more than two
    two branches. If the condition is false, skipping the first branch takes us    branches. If the condition is false, skipping the first branch takes us
    past the end if there is only one branch, but that's OK because that is    past the end of the item if there is only one branch, but that's exactly
    exactly what going to the ket would do. */    what we want. */
   
     case OP_COND:      case OP_COND:
     case OP_SCOND:      case OP_SCOND:
     codelink = GET(ecode, 1);  
   
       /* The variable codelink will be added to ecode when the condition is
       false, to get to the second branch. Setting it to the offset to the ALT
       or KET, then incrementing ecode achieves this effect. We now have ecode
       pointing to the condition or callout. */
   
       codelink = GET(ecode, 1);   /* Offset to the second branch */
       ecode += 1 + LINK_SIZE;     /* From this opcode */
   
     /* Because of the way auto-callout works during compile, a callout item is      /* Because of the way auto-callout works during compile, a callout item is
     inserted between OP_COND and an assertion condition. */      inserted between OP_COND and an assertion condition. */
   
    if (ecode[LINK_SIZE+1] == OP_CALLOUT)    if (*ecode == OP_CALLOUT)
       {        {
       if (PUBL(callout) != NULL)        if (PUBL(callout) != NULL)
         {          {
         PUBL(callout_block) cb;          PUBL(callout_block) cb;
         cb.version          = 2;   /* Version 1 of the callout block */          cb.version          = 2;   /* Version 1 of the callout block */
        cb.callout_number   = ecode[LINK_SIZE+2];        cb.callout_number   = ecode[1];
         cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
 #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
         cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
Line 1304  for (;;) Line 1311  for (;;)
         cb.subject_length   = (int)(md->end_subject - md->start_subject);          cb.subject_length   = (int)(md->end_subject - md->start_subject);
         cb.start_match      = (int)(mstart - md->start_subject);          cb.start_match      = (int)(mstart - md->start_subject);
         cb.current_position = (int)(eptr - md->start_subject);          cb.current_position = (int)(eptr - md->start_subject);
        cb.pattern_position = GET(ecode, LINK_SIZE + 3);        cb.pattern_position = GET(ecode, 2);
        cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
         cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
         cb.capture_last     = md->capture_last & CAPLMASK;          cb.capture_last     = md->capture_last & CAPLMASK;
         /* Internal change requires this for API compatibility. */          /* Internal change requires this for API compatibility. */
Line 1315  for (;;) Line 1322  for (;;)
         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
         if (rrc < 0) RRETURN(rrc);          if (rrc < 0) RRETURN(rrc);
         }          }
   
         /* Advance ecode past the callout, so it now points to the condition. We
         must adjust codelink so that the value of ecode+codelink is unchanged. */
   
       ecode += PRIV(OP_lengths)[OP_CALLOUT];        ecode += PRIV(OP_lengths)[OP_CALLOUT];
       codelink -= PRIV(OP_lengths)[OP_CALLOUT];        codelink -= PRIV(OP_lengths)[OP_CALLOUT];
       }        }
   
    condcode = ecode[LINK_SIZE+1];    /* Test the various possible conditions */
   
    /* Now see what the actual condition is */    condition = FALSE;
    switch(condcode = *ecode)
    if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */ 
       {        {
      if (md->recursive == NULL)                /* Not recursing => FALSE */      case OP_RREF:         /* Numbered group recursion test */
       if (md->recursive != NULL)     /* Not recursing => FALSE */
         {          {
        condition = FALSE;        unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
        ecode += GET(ecode, 1); 
        } 
      else 
        { 
        unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/ 
         condition = (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
           }
         break;
   
        /* If the test is for recursion into a specific subpattern, and it is      case OP_DNRREF:       /* Duplicate named group recursion test */
        false, but the test was set up by name, scan the table to see if the      if (md->recursive != NULL)
        name refers to any other numbers, and test them. The condition is true        {
        if any one is set. */        int count = GET2(ecode, 1 + IMM2_SIZE);
        pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
        if (!condition && condcode == OP_NRREF)        while (count-- > 0)
           {            {
          pcre_uchar *slotA = md->name_table;          unsigned int recno = GET2(slot, 0);
          for (i = 0; i < md->name_count; i++)          condition = recno == md->recursive->group_num;
            {          if (condition) break;
            if (GET2(slotA, 0) == recno) break;          slot += md->name_entry_size;
            slotA += md->name_entry_size; 
            } 
 
          /* Found a name for the number - there can be only one; duplicate 
          names for different numbers are allowed, but not vice versa. First 
          scan down for duplicates. */ 
 
          if (i < md->name_count) 
            { 
            pcre_uchar *slotB = slotA; 
            while (slotB > md->name_table) 
              { 
              slotB -= md->name_entry_size; 
              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) 
                { 
                condition = GET2(slotB, 0) == md->recursive->group_num; 
                if (condition) break; 
                } 
              else break; 
              } 
 
            /* Scan up for duplicates */ 
 
            if (!condition) 
              { 
              slotB = slotA; 
              for (i++; i < md->name_count; i++) 
                { 
                slotB += md->name_entry_size; 
                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) 
                  { 
                  condition = GET2(slotB, 0) == md->recursive->group_num; 
                  if (condition) break; 
                  } 
                else break; 
                } 
              } 
            } 
           }            }
   
         /* Chose branch according to the condition */  
   
         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);  
         }          }
      }      break;
   
    else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */      case OP_CREF:         /* Numbered group used test */
      {      offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
      offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */ 
       condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
         break;
   
      /* If the numbered capture is unset, but the reference was by name,      case OP_DNCREF:      /* Duplicate named group used test */
      scan the table to see if the name refers to any other numbers, and test 
      them. The condition is true if any one is set. This is tediously similar 
      to the code above, but not close enough to try to amalgamate. */ 
 
      if (!condition && condcode == OP_NCREF) 
         {          {
        unsigned int refno = offset >> 1;        int count = GET2(ecode, 1 + IMM2_SIZE);
        pcre_uchar *slotA = md->name_table;        pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
        while (count-- > 0)
        for (i = 0; i < md->name_count; i++) 
           {            {
          if (GET2(slotA, 0) == refno) break;          offset = GET2(slot, 0) << 1;
          slotA += md->name_entry_size;          condition = offset < offset_top && md->offset_vector[offset] >= 0;
           if (condition) break;
           slot += md->name_entry_size;
           }            }
   
         /* Found a name for the number - there can be only one; duplicate names  
         for different numbers are allowed, but not vice versa. First scan down  
         for duplicates. */  
   
         if (i < md->name_count)  
           {  
           pcre_uchar *slotB = slotA;  
           while (slotB > md->name_table)  
             {  
             slotB -= md->name_entry_size;  
             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
               {  
               offset = GET2(slotB, 0) << 1;  
               condition = offset < offset_top &&  
                 md->offset_vector[offset] >= 0;  
               if (condition) break;  
               }  
             else break;  
             }  
   
           /* Scan up for duplicates */  
   
           if (!condition)  
             {  
             slotB = slotA;  
             for (i++; i < md->name_count; i++)  
               {  
               slotB += md->name_entry_size;  
               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
                 {  
                 offset = GET2(slotB, 0) << 1;  
                 condition = offset < offset_top &&  
                   md->offset_vector[offset] >= 0;  
                 if (condition) break;  
                 }  
               else break;  
               }  
             }  
           }  
         }          }
         break;
   
      /* Chose branch according to the condition */      case OP_DEF:     /* DEFINE - always false */
       break;
   
      ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);      /* The condition is an assertion. Call match() to evaluate it - setting
      }      md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
       of an assertion. */
   
    else if (condcode == OP_DEF)     /* DEFINE - always false */      default:
      { 
      condition = FALSE; 
      ecode += GET(ecode, 1); 
      } 
 
    /* The condition is an assertion. Call match() to evaluate it - setting 
    md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of 
    an assertion. */ 
 
    else 
      { 
       md->match_function_type = MATCH_CONDASSERT;        md->match_function_type = MATCH_CONDASSERT;
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);      RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
       if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
         {          {
         if (md->end_offset_top > offset_top)          if (md->end_offset_top > offset_top)
           offset_top = md->end_offset_top;  /* Captures may have happened */            offset_top = md->end_offset_top;  /* Captures may have happened */
         condition = TRUE;          condition = TRUE;
        ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
         /* Advance ecode past the assertion to the start of the first branch,
         but adjust it so that the general choosing code below works. */
 
         ecode += GET(ecode, 1);
         while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
           ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
         }          }
   
       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an        /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
      assertion; it is therefore treated as NOMATCH. */      assertion; it is therefore treated as NOMATCH. Any other return is an
       error. */
   
       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
         {          {
         RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
         }          }
      else      break;
        { 
        condition = FALSE; 
        ecode += codelink; 
        } 
       }        }
   
    /* We are now at the branch that is to be obeyed. As there is only one, can    /* Choose branch according to the condition */
    use tail recursion to avoid using another stack frame, except when there is 
    unlimited repeat of a possibly empty group. In the latter case, a recursive 
    call to match() is always required, unless the second alternative doesn't 
    exist, in which case we can just plough on. Note that, for compatibility 
    with Perl, the | in a conditional group is NOT treated as creating two 
    alternatives. If a THEN is encountered in the branch, it propagates out to 
    the enclosing alternative (unless nested in a deeper set of alternatives, 
    of course). */ 
   
    if (condition || *ecode == OP_ALT)    ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
 
     /* We are now at the branch that is to be obeyed. As there is only one, we
     can use tail recursion to avoid using another stack frame, except when
     there is unlimited repeat of a possibly empty group. In the latter case, a
     recursive call to match() is always required, unless the second alternative
     doesn't exist, in which case we can just plough on. Note that, for
     compatibility with Perl, the | in a conditional group is NOT treated as
     creating two alternatives. If a THEN is encountered in the branch, it
     propagates out to the enclosing alternative (unless nested in a deeper set
     of alternatives, of course). */
 
     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
       {        {
       if (op != OP_SCOND)        if (op != OP_SCOND)
         {          {
         ecode += 1 + LINK_SIZE;  
         goto TAIL_RECURSE;          goto TAIL_RECURSE;
         }          }
   
       md->match_function_type = MATCH_CBEGROUP;        md->match_function_type = MATCH_CBEGROUP;
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);      RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
       RRETURN(rrc);        RRETURN(rrc);
       }        }
   
Line 1523  for (;;) Line 1442  for (;;)
   
     else      else
       {        {
       ecode += 1 + LINK_SIZE;  
       }        }
     break;      break;
   
Line 2089  for (;;) Line 2007  for (;;)
   
     if (*ecode == OP_KETRPOS)      if (*ecode == OP_KETRPOS)
       {        {
         md->start_match_ptr = mstart;    /* In case \K reset it */
       md->end_match_ptr = eptr;        md->end_match_ptr = eptr;
       md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
       RRETURN(MATCH_KETRPOS);        RRETURN(MATCH_KETRPOS);
Line 2656  for (;;) Line 2575  for (;;)
           RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
         break;          break;
   
        case PT_SPACE:    /* Perl space */        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||        which means that Perl space and POSIX space are now identical. PCRE
             c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)        was changed at release 8.34. */
               == (op == OP_NOTPROP)) 
          RRETURN(MATCH_NOMATCH); 
        break; 
   
           case PT_SPACE:    /* Perl space */
         case PT_PXSPACE:  /* POSIX space */          case PT_PXSPACE:  /* POSIX space */
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||        switch(c)
             c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||          {
             c == CHAR_FF || c == CHAR_CR)          HSPACE_CASES:
               == (op == OP_NOTPROP))          VSPACE_CASES:
          RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
           break;
 
           default:
           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
           break;
           }
         break;          break;
   
         case PT_WORD:          case PT_WORD:
Line 2742  for (;;) Line 2666  for (;;)
     similar code to character type repeats - written out again for speed.      similar code to character type repeats - written out again for speed.
     However, if the referenced string is the empty string, always treat      However, if the referenced string is the empty string, always treat
     it as matched, any number of times (otherwise there could be infinite      it as matched, any number of times (otherwise there could be infinite
    loops). */    loops). If the reference is unset, there are two possibilities:
   
     case OP_REF:  
     case OP_REFI:  
     caseless = op == OP_REFI;  
     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */  
     ecode += 1 + IMM2_SIZE;  
   
     /* If the reference is unset, there are two possibilities:  
   
     (a) In the default, Perl-compatible state, set the length negative;      (a) In the default, Perl-compatible state, set the length negative;
     this ensures that every attempt at a match fails. We can't just fail      this ensures that every attempt at a match fails. We can't just fail
     here, because of the possibility of quantifiers with zero minima.      here, because of the possibility of quantifiers with zero minima.
Line 2760  for (;;) Line 2676  for (;;)
     so that the back reference matches an empty string.      so that the back reference matches an empty string.
   
     Otherwise, set the length to the length of what was matched by the      Otherwise, set the length to the length of what was matched by the
    referenced subpattern. */    referenced subpattern.
   
       The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
       or to a non-duplicated named group. For a duplicated named group, OP_DNREF
       and OP_DNREFI are used. In this case we must scan the list of groups to
       which the name refers, and use the first one that is set. */
   
       case OP_DNREF:
       case OP_DNREFI:
       caseless = op == OP_DNREFI;
         {
         int count = GET2(ecode, 1+IMM2_SIZE);
         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
         ecode += 1 + 2*IMM2_SIZE;
   
         while (count-- > 0)
           {
           offset = GET2(slot, 0) << 1;
           if (offset < offset_top && md->offset_vector[offset] >= 0) break;
           slot += md->name_entry_size;
           }
         if (count < 0)
           length = (md->jscript_compat)? 0 : -1;
         else
           length = md->offset_vector[offset+1] - md->offset_vector[offset];
         }
       goto REF_REPEAT;
   
       case OP_REF:
       case OP_REFI:
       caseless = op == OP_REFI;
       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
       ecode += 1 + IMM2_SIZE;
     if (offset >= offset_top || md->offset_vector[offset] < 0)      if (offset >= offset_top || md->offset_vector[offset] < 0)
       length = (md->jscript_compat)? 0 : -1;        length = (md->jscript_compat)? 0 : -1;
     else      else
Line 2769  for (;;) Line 2716  for (;;)
   
     /* Set up for repetition, or handle the non-repeated case */      /* Set up for repetition, or handle the non-repeated case */
   
       REF_REPEAT:
     switch (*ecode)      switch (*ecode)
       {        {
       case OP_CRSTAR:        case OP_CRSTAR:
Line 2917  for (;;) Line 2865  for (;;)
         case OP_CRMINPLUS:          case OP_CRMINPLUS:
         case OP_CRQUERY:          case OP_CRQUERY:
         case OP_CRMINQUERY:          case OP_CRMINQUERY:
           case OP_CRPOSSTAR:
           case OP_CRPOSPLUS:
           case OP_CRPOSQUERY:
         c = *ecode++ - OP_CRSTAR;          c = *ecode++ - OP_CRSTAR;
        minimize = (c & 1) != 0;        if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
         else possessive = TRUE;
         min = rep_min[c];                 /* Pick up values from tables; */          min = rep_min[c];                 /* Pick up values from tables; */
         max = rep_max[c];                 /* zero for max => infinity */          max = rep_max[c];                 /* zero for max => infinity */
         if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
Line 2926  for (;;) Line 2878  for (;;)
   
         case OP_CRRANGE:          case OP_CRRANGE:
         case OP_CRMINRANGE:          case OP_CRMINRANGE:
           case OP_CRPOSRANGE:
         minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
           possessive = (*ecode == OP_CRPOSRANGE);
         min = GET2(ecode, 1);          min = GET2(ecode, 1);
         max = GET2(ecode, 1 + IMM2_SIZE);          max = GET2(ecode, 1 + IMM2_SIZE);
         if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
Line 3068  for (;;) Line 3022  for (;;)
               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
             eptr += len;              eptr += len;
             }              }
   
             if (possessive) continue;    /* No backtracking */
   
           for (;;)            for (;;)
             {              {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
Line 3098  for (;;) Line 3055  for (;;)
               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
             eptr++;              eptr++;
             }              }
   
             if (possessive) continue;    /* No backtracking */
   
           while (eptr >= pp)            while (eptr >= pp)
             {              {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
Line 3113  for (;;) Line 3073  for (;;)
     /* Control never gets here */      /* Control never gets here */
   
   
    /* Match an extended character class. This opcode is encountered only    /* Match an extended character class. In the 8-bit library, this opcode is
    when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8    encountered only when UTF-8 mode mode is supported. In the 16-bit and
    mode, because Unicode properties are supported in non-UTF-8 mode. */    32-bit libraries, codepoints greater than 255 may be encountered even when
     UTF is not supported. */
   
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:      case OP_XCLASS:
Line 3131  for (;;) Line 3092  for (;;)
         case OP_CRMINPLUS:          case OP_CRMINPLUS:
         case OP_CRQUERY:          case OP_CRQUERY:
         case OP_CRMINQUERY:          case OP_CRMINQUERY:
           case OP_CRPOSSTAR:
           case OP_CRPOSPLUS:
           case OP_CRPOSQUERY:
         c = *ecode++ - OP_CRSTAR;          c = *ecode++ - OP_CRSTAR;
        minimize = (c & 1) != 0;        if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
         else possessive = TRUE;
         min = rep_min[c];                 /* Pick up values from tables; */          min = rep_min[c];                 /* Pick up values from tables; */
         max = rep_max[c];                 /* zero for max => infinity */          max = rep_max[c];                 /* zero for max => infinity */
         if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
Line 3140  for (;;) Line 3105  for (;;)
   
         case OP_CRRANGE:          case OP_CRRANGE:
         case OP_CRMINRANGE:          case OP_CRMINRANGE:
           case OP_CRPOSRANGE:
         minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
           possessive = (*ecode == OP_CRPOSRANGE);
         min = GET2(ecode, 1);          min = GET2(ecode, 1);
         max = GET2(ecode, 1 + IMM2_SIZE);          max = GET2(ecode, 1 + IMM2_SIZE);
         if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
Line 3212  for (;;) Line 3179  for (;;)
           if (!PRIV(xclass)(c, data, utf)) break;            if (!PRIV(xclass)(c, data, utf)) break;
           eptr += len;            eptr += len;
           }            }
   
           if (possessive) continue;    /* No backtracking */
   
         for(;;)          for(;;)
           {            {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
Line 3590  for (;;) Line 3560  for (;;)
           if (fc != cc && foc != cc) break;            if (fc != cc && foc != cc) break;
           eptr++;            eptr++;
           }            }
   
         if (possessive) continue;       /* No backtracking */          if (possessive) continue;       /* No backtracking */
         for (;;)          for (;;)
           {            {
Line 3599  for (;;) Line 3568  for (;;)
           eptr--;            eptr--;
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           }            }
        RRETURN(MATCH_NOMATCH);        /* Control never gets here */
         }          }
       /* Control never gets here */  
       }        }
   
     /* Caseful comparisons (includes all multi-byte characters) */      /* Caseful comparisons (includes all multi-byte characters) */
Line 3657  for (;;) Line 3625  for (;;)
           eptr--;            eptr--;
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           }            }
        RRETURN(MATCH_NOMATCH);        /* Control never gets here */
         }          }
       }        }
     /* Control never gets here */      /* Control never gets here */
Line 3942  for (;;) Line 3910  for (;;)
             eptr--;              eptr--;
             }              }
           }            }
        /* Control never gets here */
        RRETURN(MATCH_NOMATCH); 
         }          }
       /* Control never gets here */  
       }        }
   
     /* Caseful comparisons */      /* Caseful comparisons */
Line 4079  for (;;) Line 4045  for (;;)
             eptr--;              eptr--;
             }              }
           }            }
        /* Control never gets here */
        RRETURN(MATCH_NOMATCH); 
         }          }
       }        }
     /* Control never gets here */      /* Control never gets here */
Line 4262  for (;;) Line 4227  for (;;)
             }              }
           break;            break;
   
             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
             which means that Perl space and POSIX space are now identical. PCRE
             was changed at release 8.34. */
   
           case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
             case PT_PXSPACE:  /* POSIX space */
           for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
             {              {
             if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
Line 4271  for (;;) Line 4241  for (;;)
               RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
               }                }
             GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||            switch(c)
                 c == CHAR_FF || c == CHAR_CR) 
                   == prop_fail_result) 
              RRETURN(MATCH_NOMATCH); 
            } 
          break; 
 
          case PT_PXSPACE:  /* POSIX space */ 
          for (i = 1; i <= min; i++) 
            { 
            if (eptr >= md->end_subject) 
               {                {
              SCHECK_PARTIAL();              HSPACE_CASES:
              RRETURN(MATCH_NOMATCH);              VSPACE_CASES:
               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
               break;
 
               default:
               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
                 RRETURN(MATCH_NOMATCH);
               break;
               }                }
             GETCHARINCTEST(c, eptr);  
             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)  
                    == prop_fail_result)  
               RRETURN(MATCH_NOMATCH);  
             }              }
           break;            break;
   
Line 5010  for (;;) Line 4972  for (;;)
             }              }
           /* Control never gets here */            /* Control never gets here */
   
          case PT_SPACE:    /* Perl space */          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
          for (fi = min;; fi++)          which means that Perl space and POSIX space are now identical. PCRE
            {          was changed at release 8.34. */
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM60); 
            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
            if (fi >= max) RRETURN(MATCH_NOMATCH); 
            if (eptr >= md->end_subject) 
              { 
              SCHECK_PARTIAL(); 
              RRETURN(MATCH_NOMATCH); 
              } 
            GETCHARINCTEST(c, eptr); 
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || 
                 c == CHAR_FF || c == CHAR_CR) 
                   == prop_fail_result) 
              RRETURN(MATCH_NOMATCH); 
            } 
          /* Control never gets here */ 
   
             case PT_SPACE:    /* Perl space */
           case PT_PXSPACE:  /* POSIX space */            case PT_PXSPACE:  /* POSIX space */
           for (fi = min;; fi++)            for (fi = min;; fi++)
             {              {
Line 5041  for (;;) Line 4989  for (;;)
               RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
               }                }
             GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||            switch(c)
                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)              {
                   == prop_fail_result)              HSPACE_CASES:
              RRETURN(MATCH_NOMATCH);              VSPACE_CASES:
               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
               break;
 
               default:
               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
                 RRETURN(MATCH_NOMATCH);
               break;
               }
             }              }
           /* Control never gets here */            /* Control never gets here */
   
Line 5097  for (;;) Line 5053  for (;;)
           case PT_UCNC:            case PT_UCNC:
           for (fi = min;; fi++)            for (fi = min;; fi++)
             {              {
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
Line 5528  for (;;) Line 5484  for (;;)
             }              }
           break;            break;
   
             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
             which means that Perl space and POSIX space are now identical. PCRE
             was changed at release 8.34. */
   
           case PT_SPACE:    /* Perl space */            case PT_SPACE:    /* Perl space */
             case PT_PXSPACE:  /* POSIX space */
           for (i = min; i < max; i++)            for (i = min; i < max; i++)
             {              {
             int len = 1;              int len = 1;
Line 5538  for (;;) Line 5499  for (;;)
               break;                break;
               }                }
             GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||            switch(c)
                 c == CHAR_FF || c == CHAR_CR)              {
                 == prop_fail_result)              HSPACE_CASES:
               VSPACE_CASES:
               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
               break;                break;
             eptr+= len;  
             }  
           break;  
   
          case PT_PXSPACE:  /* POSIX space */               break;
          for (i = min; i < max; i++)              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
            {                goto ENDLOOP99;   /* Break the loop */
            int len = 1; 
            if (eptr >= md->end_subject) 
              { 
              SCHECK_PARTIAL(); 
               break;                break;
               }                }
             GETCHARLENTEST(c, eptr, len);  
             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||  
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)  
                  == prop_fail_result)  
               break;  
             eptr+= len;              eptr+= len;
             }              }
             ENDLOOP99:
           break;            break;
   
           case PT_WORD:            case PT_WORD:
Line 5642  for (;;) Line 5594  for (;;)
           }            }
         }          }
   
      /* Match extended Unicode sequences. We will get here only if the      /* Match extended Unicode grapheme clusters. We will get here only if the
       support is in the binary; otherwise a compile-time error occurs. */        support is in the binary; otherwise a compile-time error occurs. */
   
       else if (ctype == OP_EXTUNI)        else if (ctype == OP_EXTUNI)
Line 5675  for (;;) Line 5627  for (;;)
         /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
   
         if (possessive) continue;    /* No backtracking */          if (possessive) continue;    /* No backtracking */
   
         for(;;)          for(;;)
           {            {
          if (eptr == pp) goto TAIL_RECURSE;          int lgb, rgb;
           PCRE_PUCHAR fptr;
 
           if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   
             /* Backtracking over an extended grapheme cluster involves inspecting
             the previous two characters (if present) to see if a break is
             permitted between them. */
   
           eptr--;            eptr--;
          for (;;)                        /* Move back over one extended */          if (!utf) c = *eptr; else
             {              {
            if (!utf) c = *eptr; else            BACKCHAR(eptr);
             GETCHAR(c, eptr);
             }
           rgb = UCD_GRAPHBREAK(c);
 
           for (;;)
             {
             if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
             fptr = eptr - 1;
             if (!utf) c = *fptr; else
               {                {
              BACKCHAR(eptr);              BACKCHAR(fptr);
              GETCHAR(c, eptr);              GETCHAR(c, fptr);
               }                }
            if (UCD_CATEGORY(c) != ucp_M) break;            lgb = UCD_GRAPHBREAK(c);
            eptr--;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
             eptr = fptr;
             rgb = lgb;
             }              }
           }            }
         }          }
Line 6211  for (;;) Line 6183  for (;;)
           }            }
         }          }
   
      /* Get here if we can't make it match with any permitted repetitions */      /* Control never gets here */
 
      RRETURN(MATCH_NOMATCH); 
       }        }
     /* Control never gets here */  
   
     /* There's been some horrible disaster. Arrival here can only mean there is      /* There's been some horrible disaster. Arrival here can only mean there is
     something seriously wrong in the code above or the OP_xxx definitions. */      something seriously wrong in the code above or the OP_xxx definitions. */
Line 6249  switch (frame->Xwhere) Line 6218  switch (frame->Xwhere)
   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
   LBL(65) LBL(66)    LBL(65) LBL(66)
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  LBL(21)  LBL(20) LBL(21)
 #endif  #endif
 #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
  LBL(16) LBL(18) LBL(20)  LBL(16) LBL(18)
   LBL(22) LBL(23) LBL(28) LBL(30)    LBL(22) LBL(23) LBL(28) LBL(30)
   LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
 #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
 #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
 #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
   default:    default:
Line 6410  const pcre_uint8 *start_bits = NULL; Line 6379  const pcre_uint8 *start_bits = NULL;
 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
 PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
 PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
PCRE_PUCHAR match_partial;PCRE_PUCHAR match_partial = NULL;
 PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
   
 const pcre_study_data *study;  const pcre_study_data *study;
Line 7178  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) Line 7147  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
   
 /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
   
if (start_partial != NULL)if (match_partial != NULL)
   {    {
   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
   md->mark = NULL;    md->mark = NULL;

Removed from v.1.1.1.4  
changed lines
  Added in v.1.1.1.5


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>