Diff for /embedaddon/pcre/pcre_study.c between versions 1.1.1.1 and 1.1.1.5

version 1.1.1.1, 2012/02/21 23:05:51 version 1.1.1.5, 2014/06/15 19:46:03
Line 6 Line 6
 and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
   
                        Written by Philip Hazel                         Written by Philip Hazel
           Copyright (c) 1997-2010 University of Cambridge           Copyright (c) 1997-2012 University of Cambridge
   
 -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
Line 66  string of that length that matches. In UTF8 mode, the  Line 66  string of that length that matches. In UTF8 mode, the 
 rather than bytes.  rather than bytes.
   
 Arguments:  Arguments:
     re              compiled pattern block
   code            pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
  startcode       pointer to start of the whole pattern  startcode       pointer to start of the whole pattern's code
   options         the compiling options    options         the compiling options
   int             RECURSE depth    int             RECURSE depth
   
Line 78  Returns:   the minimum length Line 79  Returns:   the minimum length
 */  */
   
 static int  static int
find_minlength(const uschar *code, const uschar *startcode, int options,find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
  int recurse_depth)  const pcre_uchar *startcode, int options, int recurse_depth)
 {  {
 int length = -1;  int length = -1;
BOOL utf8 = (options & PCRE_UTF8) != 0;/* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 BOOL had_recurse = FALSE;  BOOL had_recurse = FALSE;
 register int branchlength = 0;  register int branchlength = 0;
register uschar *cc = (uschar *)code + 1 + LINK_SIZE;register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
   
 if (*code == OP_CBRA || *code == OP_SCBRA ||  if (*code == OP_CBRA || *code == OP_SCBRA ||
    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2;    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
   
 /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
 branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
Line 96  branch, check the length against that of the other bra Line 98  branch, check the length against that of the other bra
 for (;;)  for (;;)
   {    {
   int d, min;    int d, min;
  uschar *cs, *ce;  pcre_uchar *cs, *ce;
  register int op = *cc;  register pcre_uchar op = *cc;
   
   switch (op)    switch (op)
     {      {
Line 128  for (;;) Line 130  for (;;)
     case OP_SBRAPOS:      case OP_SBRAPOS:
     case OP_ONCE:      case OP_ONCE:
     case OP_ONCE_NC:      case OP_ONCE_NC:
    d = find_minlength(cc, startcode, options, recurse_depth);    d = find_minlength(re, cc, startcode, options, recurse_depth);
     if (d < 0) return d;      if (d < 0) return d;
     branchlength += d;      branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
Line 174  for (;;) Line 176  for (;;)
   
     case OP_REVERSE:      case OP_REVERSE:
     case OP_CREF:      case OP_CREF:
    case OP_NCREF:    case OP_DNCREF:
     case OP_RREF:      case OP_RREF:
    case OP_NRREF:    case OP_DNRREF:
     case OP_DEF:      case OP_DEF:
     case OP_CALLOUT:      case OP_CALLOUT:
     case OP_SOD:      case OP_SOD:
Line 189  for (;;) Line 191  for (;;)
     case OP_DOLLM:      case OP_DOLLM:
     case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
     case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
    cc += _pcre_OP_lengths[*cc];    cc += PRIV(OP_lengths)[*cc];
     break;      break;
   
     /* Skip over a subpattern that has a {0} or {0,x} quantifier */      /* Skip over a subpattern that has a {0} or {0,x} quantifier */
Line 198  for (;;) Line 200  for (;;)
     case OP_BRAMINZERO:      case OP_BRAMINZERO:
     case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
     case OP_SKIPZERO:      case OP_SKIPZERO:
    cc += _pcre_OP_lengths[*cc];    cc += PRIV(OP_lengths)[*cc];
     do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
     cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
     break;      break;
Line 223  for (;;) Line 225  for (;;)
     case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
     branchlength++;      branchlength++;
     cc += 2;      cc += 2;
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif  #endif
     break;      break;
   
Line 243  for (;;) Line 245  for (;;)
     case OP_NOTEXACT:      case OP_NOTEXACT:
     case OP_NOTEXACTI:      case OP_NOTEXACTI:
     branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
    cc += 4;    cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif  #endif
     break;      break;
   
     case OP_TYPEEXACT:      case OP_TYPEEXACT:
     branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
    cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;    cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
       || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
     break;      break;
   
     /* Handle single-char non-literal matchers */      /* Handle single-char non-literal matchers */
Line 291  for (;;) Line 294  for (;;)
     appear, but leave the code, just in case.) */      appear, but leave the code, just in case.) */
   
     case OP_ANYBYTE:      case OP_ANYBYTE:
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8) return -1;    if (utf) return -1;
 #endif  #endif
     branchlength++;      branchlength++;
     cc++;      cc++;
Line 308  for (;;) Line 311  for (;;)
     case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
     case OP_TYPEPOSQUERY:      case OP_TYPEPOSQUERY:
     if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;      if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
    cc += _pcre_OP_lengths[op];    cc += PRIV(OP_lengths)[op];
     break;      break;
   
     case OP_TYPEUPTO:      case OP_TYPEUPTO:
     case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
     case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;    if (cc[1 + IMM2_SIZE] == OP_PROP
    cc += _pcre_OP_lengths[op];      || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
     cc += PRIV(OP_lengths)[op];
     break;      break;
   
     /* Check a class for variable quantification */      /* Check a class for variable quantification */
   
#ifdef SUPPORT_UTF8    case OP_CLASS:
     case OP_NCLASS:
 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
     case OP_XCLASS:      case OP_XCLASS:
    cc += GET(cc, 1) - 33;    /* The original code caused an unsigned overflow in 64 bit systems,
    /* Fall through */    so now we use a conditional statement. */
     if (op == OP_XCLASS)
       cc += GET(cc, 1);
     else
       cc += PRIV(OP_lengths)[OP_CLASS];
 #else
     cc += PRIV(OP_lengths)[OP_CLASS];
 #endif  #endif
   
     case OP_CLASS:  
     case OP_NCLASS:  
     cc += 33;  
   
     switch (*cc)      switch (*cc)
       {        {
       case OP_CRPLUS:        case OP_CRPLUS:
       case OP_CRMINPLUS:        case OP_CRMINPLUS:
         case OP_CRPOSPLUS:
       branchlength++;        branchlength++;
       /* Fall through */        /* Fall through */
   
Line 341  for (;;) Line 350  for (;;)
       case OP_CRMINSTAR:        case OP_CRMINSTAR:
       case OP_CRQUERY:        case OP_CRQUERY:
       case OP_CRMINQUERY:        case OP_CRMINQUERY:
         case OP_CRPOSSTAR:
         case OP_CRPOSQUERY:
       cc++;        cc++;
       break;        break;
   
       case OP_CRRANGE:        case OP_CRRANGE:
       case OP_CRMINRANGE:        case OP_CRMINRANGE:
         case OP_CRPOSRANGE:
       branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
      cc += 5;      cc += 1 + 2 * IMM2_SIZE;
       break;        break;
   
       default:        default:
Line 368  for (;;) Line 380  for (;;)
     matches an empty string (by default it causes a matching failure), so in      matches an empty string (by default it causes a matching failure), so in
     that case we must set the minimum length to zero. */      that case we must set the minimum length to zero. */
   
    case OP_REF:    case OP_DNREF:     /* Duplicate named pattern back reference */
     case OP_DNREFI:
     if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
       {
       int count = GET2(cc, 1+IMM2_SIZE);
       pcre_uchar *slot = (pcre_uchar *)re +
         re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
       d = INT_MAX;
       while (count-- > 0)
         {
         ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
         if (cs == NULL) return -2;
         do ce += GET(ce, 1); while (*ce == OP_ALT);
         if (cc > cs && cc < ce)
           {
           d = 0;
           had_recurse = TRUE;
           break;
           }
         else
           {
           int dd = find_minlength(re, cs, startcode, options, recurse_depth);
           if (dd < d) d = dd;
           }
         slot += re->name_entry_size;
         }
       }
     else d = 0;
     cc += 1 + 2*IMM2_SIZE;
     goto REPEAT_BACK_REFERENCE;
 
     case OP_REF:      /* Single back reference */
     case OP_REFI:      case OP_REFI:
     if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
       {        {
      ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));      ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
       if (cs == NULL) return -2;        if (cs == NULL) return -2;
       do ce += GET(ce, 1); while (*ce == OP_ALT);        do ce += GET(ce, 1); while (*ce == OP_ALT);
       if (cc > cs && cc < ce)        if (cc > cs && cc < ce)
Line 382  for (;;) Line 425  for (;;)
         }          }
       else        else
         {          {
        d = find_minlength(cs, startcode, options, recurse_depth);        d = find_minlength(re, cs, startcode, options, recurse_depth);
         }          }
       }        }
     else d = 0;      else d = 0;
    cc += 3;    cc += 1 + IMM2_SIZE;
   
     /* Handle repeated back references */      /* Handle repeated back references */
   
       REPEAT_BACK_REFERENCE:
     switch (*cc)      switch (*cc)
       {        {
       case OP_CRSTAR:        case OP_CRSTAR:
       case OP_CRMINSTAR:        case OP_CRMINSTAR:
       case OP_CRQUERY:        case OP_CRQUERY:
       case OP_CRMINQUERY:        case OP_CRMINQUERY:
         case OP_CRPOSSTAR:
         case OP_CRPOSQUERY:
       min = 0;        min = 0;
       cc++;        cc++;
       break;        break;
   
       case OP_CRPLUS:        case OP_CRPLUS:
       case OP_CRMINPLUS:        case OP_CRMINPLUS:
         case OP_CRPOSPLUS:
       min = 1;        min = 1;
       cc++;        cc++;
       break;        break;
   
       case OP_CRRANGE:        case OP_CRRANGE:
       case OP_CRMINRANGE:        case OP_CRMINRANGE:
         case OP_CRPOSRANGE:
       min = GET2(cc, 1);        min = GET2(cc, 1);
      cc += 5;      cc += 1 + 2 * IMM2_SIZE;
       break;        break;
   
       default:        default:
Line 424  for (;;) Line 472  for (;;)
     caught by a recursion depth count. */      caught by a recursion depth count. */
   
     case OP_RECURSE:      case OP_RECURSE:
    cs = ce = (uschar *)startcode + GET(cc, 1);    cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
     do ce += GET(ce, 1); while (*ce == OP_ALT);      do ce += GET(ce, 1); while (*ce == OP_ALT);
     if ((cc > cs && cc < ce) || recurse_depth > 10)      if ((cc > cs && cc < ce) || recurse_depth > 10)
       had_recurse = TRUE;        had_recurse = TRUE;
     else      else
       {        {
      branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);      branchlength += find_minlength(re, cs, startcode, options,
         recurse_depth + 1);
       }        }
     cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
     break;      break;
Line 482  for (;;) Line 531  for (;;)
     case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
     case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
   
    cc += _pcre_OP_lengths[op];    cc += PRIV(OP_lengths)[op];
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif  #endif
     break;      break;
   
Line 494  for (;;) Line 543  for (;;)
     case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
     case OP_SKIP_ARG:      case OP_SKIP_ARG:
     case OP_THEN_ARG:      case OP_THEN_ARG:
    cc += _pcre_OP_lengths[op] + cc[1];    cc += PRIV(OP_lengths)[op] + cc[1];
     break;      break;
   
     /* The remaining opcodes are just skipped over. */      /* The remaining opcodes are just skipped over. */
Line 506  for (;;) Line 555  for (;;)
     case OP_SET_SOM:      case OP_SET_SOM:
     case OP_SKIP:      case OP_SKIP:
     case OP_THEN:      case OP_THEN:
    cc += _pcre_OP_lengths[op];    cc += PRIV(OP_lengths)[op];
     break;      break;
   
     /* This should not occur: we list all opcodes explicitly so that when      /* This should not occur: we list all opcodes explicitly so that when
Line 535  Arguments: Line 584  Arguments:
   p             points to the character    p             points to the character
   caseless      the caseless flag    caseless      the caseless flag
   cd            the block with char table pointers    cd            the block with char table pointers
  utf8          TRUE for UTF-8 mode  utf           TRUE for UTF-8 / UTF-16 / UTF-32 mode
   
 Returns:        pointer after the character  Returns:        pointer after the character
 */  */
   
static const uschar *static const pcre_uchar *
set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
  compile_data *cd, BOOL utf8)  compile_data *cd, BOOL utf)
 {  {
unsigned int c = *p;pcre_uint32 c = *p;
   
   #ifdef COMPILE_PCRE8
 SET_BIT(c);  SET_BIT(c);
   
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
if (utf8 && c > 127)if (utf && c > 127)
   {    {
   GETCHARINC(c, p);    GETCHARINC(c, p);
 #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
   if (caseless)    if (caseless)
     {      {
    uschar buff[8];    pcre_uchar buff[6];
     c = UCD_OTHERCASE(c);      c = UCD_OTHERCASE(c);
    (void)_pcre_ord2utf8(c, buff);    (void)PRIV(ord2utf)(c, buff);
     SET_BIT(buff[0]);      SET_BIT(buff[0]);
     }      }
#endif#endif  /* Not SUPPORT_UCP */
   return p;    return p;
   }    }
#endif#else   /* Not SUPPORT_UTF */
 (void)(utf);   /* Stops warning for unused parameter */
 #endif  /* SUPPORT_UTF */
   
 /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
   
 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
 return p + 1;  return p + 1;
   #endif  /* COMPILE_PCRE8 */
   
   #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   if (c > 0xff)
     {
     c = 0xff;
     caseless = FALSE;
     }
   SET_BIT(c);
   
   #ifdef SUPPORT_UTF
   if (utf && c > 127)
     {
     GETCHARINC(c, p);
   #ifdef SUPPORT_UCP
     if (caseless)
       {
       c = UCD_OTHERCASE(c);
       if (c > 0xff)
         c = 0xff;
       SET_BIT(c);
       }
   #endif  /* SUPPORT_UCP */
     return p;
     }
   #else   /* Not SUPPORT_UTF */
   (void)(utf);   /* Stops warning for unused parameter */
   #endif  /* SUPPORT_UTF */
   
   if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
   return p + 1;
   #endif
 }  }
   
   
Line 594  Returns:         nothing Line 678  Returns:         nothing
 */  */
   
 static void  static void
set_type_bits(uschar *start_bits, int cbit_type, int table_limit,set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
   compile_data *cd)    compile_data *cd)
 {  {
register int c;register pcre_uint32 c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
   #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 if (table_limit == 32) return;  if (table_limit == 32) return;
 for (c = 128; c < 256; c++)  for (c = 128; c < 256; c++)
   {    {
   if ((cd->cbits[c/8] & (1 << (c&7))) != 0)    if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
     {      {
    uschar buff[8];    pcre_uchar buff[6];
    (void)_pcre_ord2utf8(c, buff);    (void)PRIV(ord2utf)(c, buff);
     SET_BIT(buff[0]);      SET_BIT(buff[0]);
     }      }
   }    }
   #endif
 }  }
   
   
Line 634  Returns:         nothing Line 720  Returns:         nothing
 */  */
   
 static void  static void
set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
   compile_data *cd)    compile_data *cd)
 {  {
register int c;register pcre_uint32 c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
   #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
   #endif
 }  }
   
   
Line 659  function fails unless the result is SSB_DONE. Line 747  function fails unless the result is SSB_DONE.
 Arguments:  Arguments:
   code         points to an expression    code         points to an expression
   start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
  utf8         TRUE if in UTF-8 mode  utf          TRUE if in UTF-8 / UTF-16 / UTF-32 mode
   cd           the block with char table pointers    cd           the block with char table pointers
   
 Returns:       SSB_FAIL     => Failed to find any starting bytes  Returns:       SSB_FAIL     => Failed to find any starting bytes
Line 669  Returns:       SSB_FAIL     => Failed to find any star Line 757  Returns:       SSB_FAIL     => Failed to find any star
 */  */
   
 static int  static int
set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
   compile_data *cd)    compile_data *cd)
 {  {
register int c;register pcre_uint32 c;
 int yield = SSB_DONE;  int yield = SSB_DONE;
int table_limit = utf8? 16:32;#if defined SUPPORT_UTF && defined COMPILE_PCRE8
 int table_limit = utf? 16:32;
 #else
 int table_limit = 32;
 #endif
   
 #if 0  #if 0
 /* ========================================================================= */  /* ========================================================================= */
Line 696  volatile int dummy; Line 788  volatile int dummy;
 do  do
   {    {
   BOOL try_next = TRUE;    BOOL try_next = TRUE;
  const uschar *tcode = code + 1 + LINK_SIZE;  const pcre_uchar *tcode = code + 1 + LINK_SIZE;
   
   if (*code == OP_CBRA || *code == OP_SCBRA ||    if (*code == OP_CBRA || *code == OP_SCBRA ||
      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
   
   while (try_next)    /* Loop for items in this branch */    while (try_next)    /* Loop for items in this branch */
     {      {
Line 728  do Line 820  do
       case OP_COND:        case OP_COND:
       case OP_CREF:        case OP_CREF:
       case OP_DEF:        case OP_DEF:
         case OP_DNCREF:
         case OP_DNREF:
         case OP_DNREFI:
         case OP_DNRREF:
       case OP_DOLL:        case OP_DOLL:
       case OP_DOLLM:        case OP_DOLLM:
       case OP_END:        case OP_END:
Line 736  do Line 832  do
       case OP_EXTUNI:        case OP_EXTUNI:
       case OP_FAIL:        case OP_FAIL:
       case OP_MARK:        case OP_MARK:
       case OP_NCREF:  
       case OP_NOT:        case OP_NOT:
       case OP_NOTEXACT:        case OP_NOTEXACT:
       case OP_NOTEXACTI:        case OP_NOTEXACTI:
Line 768  do Line 863  do
       case OP_NOTUPTOI:        case OP_NOTUPTOI:
       case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
       case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
       case OP_NRREF:  
       case OP_PROP:        case OP_PROP:
       case OP_PRUNE:        case OP_PRUNE:
       case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
Line 785  do Line 879  do
       case OP_SOM:        case OP_SOM:
       case OP_THEN:        case OP_THEN:
       case OP_THEN_ARG:        case OP_THEN_ARG:
   #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       case OP_XCLASS:        case OP_XCLASS:
   #endif
       return SSB_FAIL;        return SSB_FAIL;
   
       /* We can ignore word boundary tests. */        /* We can ignore word boundary tests. */
Line 811  do Line 907  do
       case OP_ONCE:        case OP_ONCE:
       case OP_ONCE_NC:        case OP_ONCE_NC:
       case OP_ASSERT:        case OP_ASSERT:
      rc = set_start_bits(tcode, start_bits, utf8, cd);      rc = set_start_bits(tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
       if (rc == SSB_DONE) try_next = FALSE; else        if (rc == SSB_DONE) try_next = FALSE; else
         {          {
Line 858  do Line 954  do
       case OP_BRAZERO:        case OP_BRAZERO:
       case OP_BRAMINZERO:        case OP_BRAMINZERO:
       case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
      rc = set_start_bits(++tcode, start_bits, utf8, cd);      rc = set_start_bits(++tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
 /* =========================================================================  /* =========================================================================
       See the comment at the head of this function concerning the next line,        See the comment at the head of this function concerning the next line,
Line 885  do Line 981  do
       case OP_QUERY:        case OP_QUERY:
       case OP_MINQUERY:        case OP_MINQUERY:
       case OP_POSQUERY:        case OP_POSQUERY:
      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
       break;        break;
   
       case OP_STARI:        case OP_STARI:
Line 894  do Line 990  do
       case OP_QUERYI:        case OP_QUERYI:
       case OP_MINQUERYI:        case OP_MINQUERYI:
       case OP_POSQUERYI:        case OP_POSQUERYI:
      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       break;        break;
   
       /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
Line 902  do Line 998  do
       case OP_UPTO:        case OP_UPTO:
       case OP_MINUPTO:        case OP_MINUPTO:
       case OP_POSUPTO:        case OP_POSUPTO:
      tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
       break;        break;
   
       case OP_UPTOI:        case OP_UPTOI:
       case OP_MINUPTOI:        case OP_MINUPTOI:
       case OP_POSUPTOI:        case OP_POSUPTOI:
      tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
       break;        break;
   
       /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
   
       case OP_EXACT:        case OP_EXACT:
      tcode += 2;      tcode += IMM2_SIZE;
       /* Fall through */        /* Fall through */
       case OP_CHAR:        case OP_CHAR:
       case OP_PLUS:        case OP_PLUS:
       case OP_MINPLUS:        case OP_MINPLUS:
       case OP_POSPLUS:        case OP_POSPLUS:
      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
       case OP_EXACTI:        case OP_EXACTI:
      tcode += 2;      tcode += IMM2_SIZE;
       /* Fall through */        /* Fall through */
       case OP_CHARI:        case OP_CHARI:
       case OP_PLUSI:        case OP_PLUSI:
       case OP_MINPLUSI:        case OP_MINPLUSI:
       case OP_POSPLUSI:        case OP_POSPLUSI:
      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
Line 942  do Line 1038  do
       identical. */        identical. */
   
       case OP_HSPACE:        case OP_HSPACE:
      SET_BIT(0x09);      SET_BIT(CHAR_HT);
      SET_BIT(0x20);      SET_BIT(CHAR_SPACE);
      if (utf8)#ifdef SUPPORT_UTF
       if (utf)
         {          {
   #ifdef COMPILE_PCRE8
         SET_BIT(0xC2);  /* For U+00A0 */          SET_BIT(0xC2);  /* For U+00A0 */
         SET_BIT(0xE1);  /* For U+1680, U+180E */          SET_BIT(0xE1);  /* For U+1680, U+180E */
         SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
         SET_BIT(0xE3);  /* For U+3000 */          SET_BIT(0xE3);  /* For U+3000 */
   #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
           SET_BIT(0xA0);
           SET_BIT(0xFF);  /* For characters > 255 */
   #endif  /* COMPILE_PCRE[8|16|32] */
         }          }
      else SET_BIT(0xA0);      else
 #endif /* SUPPORT_UTF */
         {
 #ifndef EBCDIC
         SET_BIT(0xA0);
 #endif  /* Not EBCDIC */
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
         SET_BIT(0xFF);  /* For characters > 255 */
 #endif  /* COMPILE_PCRE[16|32] */
         }
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
       case OP_ANYNL:        case OP_ANYNL:
       case OP_VSPACE:        case OP_VSPACE:
      SET_BIT(0x0A);      SET_BIT(CHAR_LF);
      SET_BIT(0x0B);      SET_BIT(CHAR_VT);
      SET_BIT(0x0C);      SET_BIT(CHAR_FF);
      SET_BIT(0x0D);      SET_BIT(CHAR_CR);
      if (utf8)#ifdef SUPPORT_UTF
       if (utf)
         {          {
   #ifdef COMPILE_PCRE8
         SET_BIT(0xC2);  /* For U+0085 */          SET_BIT(0xC2);  /* For U+0085 */
         SET_BIT(0xE2);  /* For U+2028, U+2029 */          SET_BIT(0xE2);  /* For U+2028, U+2029 */
   #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
           SET_BIT(CHAR_NEL);
           SET_BIT(0xFF);  /* For characters > 255 */
   #endif  /* COMPILE_PCRE[8|16|32] */
         }          }
      else SET_BIT(0x85);      else
 #endif /* SUPPORT_UTF */
         {
         SET_BIT(CHAR_NEL);
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
         SET_BIT(0xFF);  /* For characters > 255 */
 #endif
         }
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
Line 986  do Line 1110  do
       break;        break;
   
       /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to
      ensure it is set as not whitespace. */      ensure it is set as not whitespace. Luckily, the code value is the same
       (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
   
       case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
       set_nottype_bits(start_bits, cbit_space, table_limit, cd);        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
Line 994  do Line 1119  do
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
      /* The cbit_space table has vertical tab as whitespace; we have to      /* The cbit_space table has vertical tab as whitespace; we have to not
      not set it from the table. */      set it from the table. Luckily, the code value is the same (0x0b) in
       ASCII and EBCDIC, so we can just adjust the appropriate bit. */
   
       case OP_WHITESPACE:        case OP_WHITESPACE:
       c = start_bits[1];    /* Save in case it was already set */        c = start_bits[1];    /* Save in case it was already set */
Line 1024  do Line 1150  do
       break;        break;
   
       case OP_TYPEEXACT:        case OP_TYPEEXACT:
      tcode += 3;      tcode += 1 + IMM2_SIZE;
       break;        break;
   
       /* Zero or more repeats of character types set the bits and then        /* Zero or more repeats of character types set the bits and then
Line 1033  do Line 1159  do
       case OP_TYPEUPTO:        case OP_TYPEUPTO:
       case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
       case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
      tcode += 2;               /* Fall through */      tcode += IMM2_SIZE;  /* Fall through */
   
       case OP_TYPESTAR:        case OP_TYPESTAR:
       case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
Line 1049  do Line 1175  do
         return SSB_FAIL;          return SSB_FAIL;
   
         case OP_HSPACE:          case OP_HSPACE:
        SET_BIT(0x09);        SET_BIT(CHAR_HT);
        SET_BIT(0x20);        SET_BIT(CHAR_SPACE);
        if (utf8)#ifdef SUPPORT_UTF
         if (utf)
           {            {
   #ifdef COMPILE_PCRE8
           SET_BIT(0xC2);  /* For U+00A0 */            SET_BIT(0xC2);  /* For U+00A0 */
           SET_BIT(0xE1);  /* For U+1680, U+180E */            SET_BIT(0xE1);  /* For U+1680, U+180E */
           SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
           SET_BIT(0xE3);  /* For U+3000 */            SET_BIT(0xE3);  /* For U+3000 */
   #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
             SET_BIT(0xA0);
             SET_BIT(0xFF);  /* For characters > 255 */
   #endif  /* COMPILE_PCRE[8|16|32] */
           }            }
        else SET_BIT(0xA0);        else
 #endif /* SUPPORT_UTF */
 #ifndef EBCDIC
           SET_BIT(0xA0);
 #endif  /* Not EBCDIC */
         break;          break;
   
         case OP_ANYNL:          case OP_ANYNL:
         case OP_VSPACE:          case OP_VSPACE:
        SET_BIT(0x0A);        SET_BIT(CHAR_LF);
        SET_BIT(0x0B);        SET_BIT(CHAR_VT);
        SET_BIT(0x0C);        SET_BIT(CHAR_FF);
        SET_BIT(0x0D);        SET_BIT(CHAR_CR);
        if (utf8)#ifdef SUPPORT_UTF
         if (utf)
           {            {
   #ifdef COMPILE_PCRE8
           SET_BIT(0xC2);  /* For U+0085 */            SET_BIT(0xC2);  /* For U+0085 */
           SET_BIT(0xE2);  /* For U+2028, U+2029 */            SET_BIT(0xE2);  /* For U+2028, U+2029 */
   #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
             SET_BIT(CHAR_NEL);
             SET_BIT(0xFF);  /* For characters > 255 */
   #endif  /* COMPILE_PCRE16 */
           }            }
        else SET_BIT(0x85);        else
 #endif /* SUPPORT_UTF */
           SET_BIT(CHAR_NEL);
         break;          break;
   
         case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
Line 1083  do Line 1227  do
         set_type_bits(start_bits, cbit_digit, table_limit, cd);          set_type_bits(start_bits, cbit_digit, table_limit, cd);
         break;          break;
   
        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we no longer
        ensure it gets set as not whitespace. */        have to play fancy tricks because Perl added VT to its whitespace at
         release 5.18. PCRE added it at release 8.34. */
   
         case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
         set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] |= 0x08;  
         break;          break;
   
         /* The cbit_space table has vertical tab as whitespace; we have to  
         avoid setting it. */  
   
         case OP_WHITESPACE:          case OP_WHITESPACE:
         c = start_bits[1];    /* Save in case it was already set */  
         set_type_bits(start_bits, cbit_space, table_limit, cd);          set_type_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] = (start_bits[1] & ~0x08) | c;  
         break;          break;
   
         case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
Line 1119  do Line 1258  do
       character with a value > 255. */        character with a value > 255. */
   
       case OP_NCLASS:        case OP_NCLASS:
#ifdef SUPPORT_UTF8#if defined SUPPORT_UTF && defined COMPILE_PCRE8
      if (utf8)      if (utf)
         {          {
         start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */          start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
         memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
         }          }
 #endif  #endif
   #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
         SET_BIT(0xFF);                         /* For characters > 255 */
   #endif
       /* Fall through */        /* Fall through */
   
       case OP_CLASS:        case OP_CLASS:
         {          {
           pcre_uint8 *map;
         tcode++;          tcode++;
           map = (pcre_uint8 *)tcode;
   
         /* In UTF-8 mode, the bits in a bit map correspond to character          /* In UTF-8 mode, the bits in a bit map correspond to character
         values, not to byte values. However, the bit map we are constructing is          values, not to byte values. However, the bit map we are constructing is
Line 1138  do Line 1282  do
         value is > 127. In fact, there are only two possible starting bytes for          value is > 127. In fact, there are only two possible starting bytes for
         characters in the range 128 - 255. */          characters in the range 128 - 255. */
   
#ifdef SUPPORT_UTF8#if defined SUPPORT_UTF && defined COMPILE_PCRE8
        if (utf8)        if (utf)
           {            {
          for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];          for (c = 0; c < 16; c++) start_bits[c] |= map[c];
           for (c = 128; c < 256; c++)            for (c = 128; c < 256; c++)
             {              {
            if ((tcode[c/8] && (1 << (c&7))) != 0)            if ((map[c/8] && (1 << (c&7))) != 0)
               {                {
               int d = (c >> 6) | 0xc0;            /* Set bit for this starter */                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
               start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
Line 1152  do Line 1296  do
               }                }
             }              }
           }            }
   
         /* In non-UTF-8 mode, the two bit maps are completely compatible. */  
   
         else          else
 #endif  #endif
           {            {
          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];          /* In non-UTF-8 mode, the two bit maps are completely compatible. */
           for (c = 0; c < 32; c++) start_bits[c] |= map[c];
           }            }
   
         /* Advance past the bit map, and act on what follows. For a zero          /* Advance past the bit map, and act on what follows. For a zero
         minimum repeat, continue; otherwise stop processing. */          minimum repeat, continue; otherwise stop processing. */
   
        tcode += 32;        tcode += 32 / sizeof(pcre_uchar);
         switch (*tcode)          switch (*tcode)
           {            {
           case OP_CRSTAR:            case OP_CRSTAR:
           case OP_CRMINSTAR:            case OP_CRMINSTAR:
           case OP_CRQUERY:            case OP_CRQUERY:
           case OP_CRMINQUERY:            case OP_CRMINQUERY:
             case OP_CRPOSSTAR:
             case OP_CRPOSQUERY:
           tcode++;            tcode++;
           break;            break;
   
           case OP_CRRANGE:            case OP_CRRANGE:
           case OP_CRMINRANGE:            case OP_CRMINRANGE:
          if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;          case OP_CRPOSRANGE:
           if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
             else try_next = FALSE;              else try_next = FALSE;
           break;            break;
   
Line 1205  return yield; Line 1350  return yield;
 *************************************************/  *************************************************/
   
 /* This function is handed a compiled expression that it must study to produce  /* This function is handed a compiled expression that it must study to produce
information that will speed up the matching. It returns a pcre_extra blockinformation that will speed up the matching. It returns a pcre[16]_extra block
 which then gets handed back to pcre_exec().  which then gets handed back to pcre_exec().
   
 Arguments:  Arguments:
Line 1214  Arguments: Line 1359  Arguments:
   errorptr  points to where to place error messages;    errorptr  points to where to place error messages;
             set NULL unless error              set NULL unless error
   
Returns:    pointer to a pcre_extra block, with study_data filled in and theReturns:    pointer to a pcre[16]_extra block, with study_data filled in and
              appropriate flags set;              the appropriate flags set;
             NULL on error or if no optimization possible              NULL on error or if no optimization possible
 */  */
   
   #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
 pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
   #elif defined COMPILE_PCRE16
   PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
   pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
   #elif defined COMPILE_PCRE32
   PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
   pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
   #endif
 {  {
 int min;  int min;
 BOOL bits_set = FALSE;  BOOL bits_set = FALSE;
uschar start_bits[32];pcre_uint8 start_bits[32];
pcre_extra *extra = NULL;PUBL(extra) *extra = NULL;
 pcre_study_data *study;  pcre_study_data *study;
const uschar *tables;const pcre_uint8 *tables;
uschar *code;pcre_uchar *code;
 compile_data compile_block;  compile_data compile_block;
const real_pcre *re = (const real_pcre *)external_re;const REAL_PCRE *re = (const REAL_PCRE *)external_re;
   
   
 *errorptr = NULL;  *errorptr = NULL;
   
 if (re == NULL || re->magic_number != MAGIC_NUMBER)  if (re == NULL || re->magic_number != MAGIC_NUMBER)
Line 1240  if (re == NULL || re->magic_number != MAGIC_NUMBER) Line 1394  if (re == NULL || re->magic_number != MAGIC_NUMBER)
   return NULL;    return NULL;
   }    }
   
   if ((re->flags & PCRE_MODE) == 0)
     {
   #if defined COMPILE_PCRE8
     *errorptr = "argument not compiled in 8 bit mode";
   #elif defined COMPILE_PCRE16
     *errorptr = "argument not compiled in 16 bit mode";
   #elif defined COMPILE_PCRE32
     *errorptr = "argument not compiled in 32 bit mode";
   #endif
     return NULL;
     }
   
 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)  if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
   {    {
   *errorptr = "unknown or incorrect option bit(s) set";    *errorptr = "unknown or incorrect option bit(s) set";
   return NULL;    return NULL;
   }    }
   
code = (uschar *)re + re->name_table_offset +code = (pcre_uchar *)re + re->name_table_offset +
   (re->name_count * re->name_entry_size);    (re->name_count * re->name_entry_size);
   
 /* For an anchored pattern, or an unanchored pattern that has a first char, or  /* For an anchored pattern, or an unanchored pattern that has a first char, or
Line 1261  if ((re->options & PCRE_ANCHORED) == 0 && Line 1427  if ((re->options & PCRE_ANCHORED) == 0 &&
   /* Set the character tables in the block that is passed around */    /* Set the character tables in the block that is passed around */
   
   tables = re->tables;    tables = re->tables;
   
   #if defined COMPILE_PCRE8
   if (tables == NULL)    if (tables == NULL)
     (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
     (void *)(&tables));      (void *)(&tables));
   #elif defined COMPILE_PCRE16
     if (tables == NULL)
       (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
       (void *)(&tables));
   #elif defined COMPILE_PCRE32
     if (tables == NULL)
       (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
       (void *)(&tables));
   #endif
   
   compile_block.lcc = tables + lcc_offset;    compile_block.lcc = tables + lcc_offset;
   compile_block.fcc = tables + fcc_offset;    compile_block.fcc = tables + fcc_offset;
Line 1272  if ((re->options & PCRE_ANCHORED) == 0 && Line 1449  if ((re->options & PCRE_ANCHORED) == 0 &&
   
   /* See if we can find a fixed set of initial characters for the pattern. */    /* See if we can find a fixed set of initial characters for the pattern. */
   
  memset(start_bits, 0, 32 * sizeof(uschar));  memset(start_bits, 0, 32 * sizeof(pcre_uint8));
   rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
     &compile_block);      &compile_block);
   bits_set = rc == SSB_DONE;    bits_set = rc == SSB_DONE;
Line 1285  if ((re->options & PCRE_ANCHORED) == 0 && Line 1462  if ((re->options & PCRE_ANCHORED) == 0 &&
   
 /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
   
switch(min = find_minlength(code, code, re->options, 0))switch(min = find_minlength(re, code, code, re->options, 0))
   {    {
   case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
   case -3: *errorptr = "internal error: opcode not recognized"; return NULL;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
Line 1293  switch(min = find_minlength(code, code, re->options, 0 Line 1470  switch(min = find_minlength(code, code, re->options, 0
   }    }
   
 /* If a set of starting bytes has been identified, or if the minimum length is  /* If a set of starting bytes has been identified, or if the minimum length is
greater than zero, or if JIT optimization has been requested, get a pcre_extragreater than zero, or if JIT optimization has been requested, or if
block and a pcre_study_data block. The study data is put in the latter, whichPCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a
is pointed to by the former, which may also get additional data set later bypcre_study_data block. The study data is put in the latter, which is pointed to
the calling program. At the moment, the size of pcre_study_data is fixed. Weby the former, which may also get additional data set later by the calling
nevertheless save it in a field for returning via the pcre_fullinfo() functionprogram. At the moment, the size of pcre_study_data is fixed. We nevertheless
so that if it becomes variable in the future, we don't have to change thatsave it in a field for returning via the pcre_fullinfo() function so that if it
code. */becomes variable in the future, we don't have to change that code. */
   
if (bits_set || min > 0if (bits_set || min > 0 || (options & (
 #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
    || (options & PCRE_STUDY_JIT_COMPILE) != 0    PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
 #endif  #endif
  )    PCRE_STUDY_EXTRA_NEEDED)) != 0)
   {    {
  extra = (pcre_extra *)(pcre_malloc)  extra = (PUBL(extra) *)(PUBL(malloc))
    (sizeof(pcre_extra) + sizeof(pcre_study_data));    (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
   if (extra == NULL)    if (extra == NULL)
     {      {
     *errorptr = "failed to get memory";      *errorptr = "failed to get memory";
     return NULL;      return NULL;
     }      }
   
  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));  study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
   extra->flags = PCRE_EXTRA_STUDY_DATA;    extra->flags = PCRE_EXTRA_STUDY_DATA;
   extra->study_data = study;    extra->study_data = study;
   
Line 1331  if (bits_set || min > 0 Line 1509  if (bits_set || min > 0
     study->flags |= PCRE_STUDY_MAPPED;      study->flags |= PCRE_STUDY_MAPPED;
     memcpy(study->start_bits, start_bits, sizeof(start_bits));      memcpy(study->start_bits, start_bits, sizeof(start_bits));
     }      }
  else memset(study->start_bits, 0, 32 * sizeof(uschar));  else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
   
   #ifdef PCRE_DEBUG
     if (bits_set)
       {
       pcre_uint8 *ptr = start_bits;
       int i;
   
       printf("Start bits:\n");
       for (i = 0; i < 32; i++)
         printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
       }
   #endif
   
   /* Always set the minlength value in the block, because the JIT compiler    /* Always set the minlength value in the block, because the JIT compiler
   makes use of it. However, don't set the bit unless the length is greater than    makes use of it. However, don't set the bit unless the length is greater than
   zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time    zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
Line 1347  if (bits_set || min > 0 Line 1537  if (bits_set || min > 0
   
   /* If JIT support was compiled and requested, attempt the JIT compilation.    /* If JIT support was compiled and requested, attempt the JIT compilation.
   If no starting bytes were found, and the minimum length is zero, and JIT    If no starting bytes were found, and the minimum length is zero, and JIT
  compilation fails, abandon the extra block and return NULL. */  compilation fails, abandon the extra block and return NULL, unless
   PCRE_STUDY_EXTRA_NEEDED is set. */
   
 #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
   extra->executable_jit = NULL;    extra->executable_jit = NULL;
  if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra);  if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
  if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)    PRIV(jit_compile)(re, extra, JIT_COMPILE);
   if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
     PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
   if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
     PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
 
   if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
       (options & PCRE_STUDY_EXTRA_NEEDED) == 0)
     {      {
   #if defined COMPILE_PCRE8
     pcre_free_study(extra);      pcre_free_study(extra);
   #elif defined COMPILE_PCRE16
       pcre16_free_study(extra);
   #elif defined COMPILE_PCRE32
       pcre32_free_study(extra);
   #endif
     extra = NULL;      extra = NULL;
     }      }
 #endif  #endif
Line 1370  return extra; Line 1574  return extra;
   
 /* This function frees the memory that was obtained by pcre_study().  /* This function frees the memory that was obtained by pcre_study().
   
Argument:   a pointer to the pcre_extra blockArgument:   a pointer to the pcre[16]_extra block
 Returns:    nothing  Returns:    nothing
 */  */
   
   #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN void  PCRE_EXP_DEFN void
 pcre_free_study(pcre_extra *extra)  pcre_free_study(pcre_extra *extra)
   #elif defined COMPILE_PCRE16
   PCRE_EXP_DEFN void
   pcre16_free_study(pcre16_extra *extra)
   #elif defined COMPILE_PCRE32
   PCRE_EXP_DEFN void
   pcre32_free_study(pcre32_extra *extra)
   #endif
 {  {
   if (extra == NULL)
     return;
 #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&  if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
      extra->executable_jit != NULL)       extra->executable_jit != NULL)
  _pcre_jit_free(extra->executable_jit);  PRIV(jit_free)(extra->executable_jit);
 #endif  #endif
pcre_free(extra);PUBL(free)(extra);
 }  }
   
 /* End of pcre_study.c */  /* End of pcre_study.c */

Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.5


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>