Diff for /embedaddon/pcre/pcre_study.c between versions 1.1.1.1 and 1.1.1.3

version 1.1.1.1, 2012/02/21 23:05:51 version 1.1.1.3, 2012/10/09 09:19:17
Line 6 Line 6
 and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
   
                        Written by Philip Hazel                         Written by Philip Hazel
           Copyright (c) 1997-2010 University of Cambridge           Copyright (c) 1997-2012 University of Cambridge
   
 -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
Line 78  Returns:   the minimum length Line 78  Returns:   the minimum length
 */  */
   
 static int  static int
find_minlength(const uschar *code, const uschar *startcode, int options,find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
   int recurse_depth)    int recurse_depth)
 {  {
 int length = -1;  int length = -1;
BOOL utf8 = (options & PCRE_UTF8) != 0;/* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 BOOL had_recurse = FALSE;  BOOL had_recurse = FALSE;
 register int branchlength = 0;  register int branchlength = 0;
register uschar *cc = (uschar *)code + 1 + LINK_SIZE;register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
   
 if (*code == OP_CBRA || *code == OP_SCBRA ||  if (*code == OP_CBRA || *code == OP_SCBRA ||
    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2;    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
   
 /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
 branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
Line 96  branch, check the length against that of the other bra Line 97  branch, check the length against that of the other bra
 for (;;)  for (;;)
   {    {
   int d, min;    int d, min;
  uschar *cs, *ce;  pcre_uchar *cs, *ce;
   register int op = *cc;    register int op = *cc;
   
   switch (op)    switch (op)
Line 189  for (;;) Line 190  for (;;)
     case OP_DOLLM:      case OP_DOLLM:
     case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
     case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
    cc += _pcre_OP_lengths[*cc];    cc += PRIV(OP_lengths)[*cc];
     break;      break;
   
     /* Skip over a subpattern that has a {0} or {0,x} quantifier */      /* Skip over a subpattern that has a {0} or {0,x} quantifier */
Line 198  for (;;) Line 199  for (;;)
     case OP_BRAMINZERO:      case OP_BRAMINZERO:
     case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
     case OP_SKIPZERO:      case OP_SKIPZERO:
    cc += _pcre_OP_lengths[*cc];    cc += PRIV(OP_lengths)[*cc];
     do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
     cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
     break;      break;
Line 223  for (;;) Line 224  for (;;)
     case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
     branchlength++;      branchlength++;
     cc += 2;      cc += 2;
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif  #endif
     break;      break;
   
Line 243  for (;;) Line 244  for (;;)
     case OP_NOTEXACT:      case OP_NOTEXACT:
     case OP_NOTEXACTI:      case OP_NOTEXACTI:
     branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
    cc += 4;    cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif  #endif
     break;      break;
   
     case OP_TYPEEXACT:      case OP_TYPEEXACT:
     branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
    cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;    cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
       || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
     break;      break;
   
     /* Handle single-char non-literal matchers */      /* Handle single-char non-literal matchers */
Line 291  for (;;) Line 293  for (;;)
     appear, but leave the code, just in case.) */      appear, but leave the code, just in case.) */
   
     case OP_ANYBYTE:      case OP_ANYBYTE:
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8) return -1;    if (utf) return -1;
 #endif  #endif
     branchlength++;      branchlength++;
     cc++;      cc++;
Line 308  for (;;) Line 310  for (;;)
     case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
     case OP_TYPEPOSQUERY:      case OP_TYPEPOSQUERY:
     if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;      if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
    cc += _pcre_OP_lengths[op];    cc += PRIV(OP_lengths)[op];
     break;      break;
   
     case OP_TYPEUPTO:      case OP_TYPEUPTO:
     case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
     case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;    if (cc[1 + IMM2_SIZE] == OP_PROP
    cc += _pcre_OP_lengths[op];      || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
     cc += PRIV(OP_lengths)[op];
     break;      break;
   
     /* Check a class for variable quantification */      /* Check a class for variable quantification */
   
#ifdef SUPPORT_UTF8#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:      case OP_XCLASS:
    cc += GET(cc, 1) - 33;    cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
     /* Fall through */      /* Fall through */
 #endif  #endif
   
     case OP_CLASS:      case OP_CLASS:
     case OP_NCLASS:      case OP_NCLASS:
    cc += 33;    cc += PRIV(OP_lengths)[OP_CLASS];
   
     switch (*cc)      switch (*cc)
       {        {
Line 347  for (;;) Line 350  for (;;)
       case OP_CRRANGE:        case OP_CRRANGE:
       case OP_CRMINRANGE:        case OP_CRMINRANGE:
       branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
      cc += 5;      cc += 1 + 2 * IMM2_SIZE;
       break;        break;
   
       default:        default:
Line 372  for (;;) Line 375  for (;;)
     case OP_REFI:      case OP_REFI:
     if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
       {        {
      ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));      ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
       if (cs == NULL) return -2;        if (cs == NULL) return -2;
       do ce += GET(ce, 1); while (*ce == OP_ALT);        do ce += GET(ce, 1); while (*ce == OP_ALT);
       if (cc > cs && cc < ce)        if (cc > cs && cc < ce)
Line 386  for (;;) Line 389  for (;;)
         }          }
       }        }
     else d = 0;      else d = 0;
    cc += 3;    cc += 1 + IMM2_SIZE;
   
     /* Handle repeated back references */      /* Handle repeated back references */
   
Line 409  for (;;) Line 412  for (;;)
       case OP_CRRANGE:        case OP_CRRANGE:
       case OP_CRMINRANGE:        case OP_CRMINRANGE:
       min = GET2(cc, 1);        min = GET2(cc, 1);
      cc += 5;      cc += 1 + 2 * IMM2_SIZE;
       break;        break;
   
       default:        default:
Line 424  for (;;) Line 427  for (;;)
     caught by a recursion depth count. */      caught by a recursion depth count. */
   
     case OP_RECURSE:      case OP_RECURSE:
    cs = ce = (uschar *)startcode + GET(cc, 1);    cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
     do ce += GET(ce, 1); while (*ce == OP_ALT);      do ce += GET(ce, 1); while (*ce == OP_ALT);
     if ((cc > cs && cc < ce) || recurse_depth > 10)      if ((cc > cs && cc < ce) || recurse_depth > 10)
       had_recurse = TRUE;        had_recurse = TRUE;
Line 482  for (;;) Line 485  for (;;)
     case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
     case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
   
    cc += _pcre_OP_lengths[op];    cc += PRIV(OP_lengths)[op];
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif  #endif
     break;      break;
   
Line 494  for (;;) Line 497  for (;;)
     case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
     case OP_SKIP_ARG:      case OP_SKIP_ARG:
     case OP_THEN_ARG:      case OP_THEN_ARG:
    cc += _pcre_OP_lengths[op] + cc[1];    cc += PRIV(OP_lengths)[op] + cc[1];
     break;      break;
   
     /* The remaining opcodes are just skipped over. */      /* The remaining opcodes are just skipped over. */
Line 506  for (;;) Line 509  for (;;)
     case OP_SET_SOM:      case OP_SET_SOM:
     case OP_SKIP:      case OP_SKIP:
     case OP_THEN:      case OP_THEN:
    cc += _pcre_OP_lengths[op];    cc += PRIV(OP_lengths)[op];
     break;      break;
   
     /* This should not occur: we list all opcodes explicitly so that when      /* This should not occur: we list all opcodes explicitly so that when
Line 535  Arguments: Line 538  Arguments:
   p             points to the character    p             points to the character
   caseless      the caseless flag    caseless      the caseless flag
   cd            the block with char table pointers    cd            the block with char table pointers
  utf8          TRUE for UTF-8 mode  utf           TRUE for UTF-8 / UTF-16 mode
   
 Returns:        pointer after the character  Returns:        pointer after the character
 */  */
   
static const uschar *static const pcre_uchar *
set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
  compile_data *cd, BOOL utf8)  compile_data *cd, BOOL utf)
 {  {
 unsigned int c = *p;  unsigned int c = *p;
   
   #ifdef COMPILE_PCRE8
 SET_BIT(c);  SET_BIT(c);
   
#ifdef SUPPORT_UTF8#ifdef SUPPORT_UTF
if (utf8 && c > 127)if (utf && c > 127)
   {    {
   GETCHARINC(c, p);    GETCHARINC(c, p);
 #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
   if (caseless)    if (caseless)
     {      {
    uschar buff[8];    pcre_uchar buff[6];
     c = UCD_OTHERCASE(c);      c = UCD_OTHERCASE(c);
    (void)_pcre_ord2utf8(c, buff);    (void)PRIV(ord2utf)(c, buff);
     SET_BIT(buff[0]);      SET_BIT(buff[0]);
     }      }
 #endif  #endif
Line 569  if (utf8 && c > 127) Line 573  if (utf8 && c > 127)
   
 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
 return p + 1;  return p + 1;
   #endif
   
   #ifdef COMPILE_PCRE16
   if (c > 0xff)
     {
     c = 0xff;
     caseless = FALSE;
     }
   SET_BIT(c);
   
   #ifdef SUPPORT_UTF
   if (utf && c > 127)
     {
     GETCHARINC(c, p);
   #ifdef SUPPORT_UCP
     if (caseless)
       {
       c = UCD_OTHERCASE(c);
       if (c > 0xff)
         c = 0xff;
       SET_BIT(c);
       }
   #endif
     return p;
     }
   #endif
   
   if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
   return p + 1;
   #endif
 }  }
   
   
Line 594  Returns:         nothing Line 628  Returns:         nothing
 */  */
   
 static void  static void
set_type_bits(uschar *start_bits, int cbit_type, int table_limit,set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
   compile_data *cd)    compile_data *cd)
 {  {
 register int c;  register int c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
   #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 if (table_limit == 32) return;  if (table_limit == 32) return;
 for (c = 128; c < 256; c++)  for (c = 128; c < 256; c++)
   {    {
   if ((cd->cbits[c/8] & (1 << (c&7))) != 0)    if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
     {      {
    uschar buff[8];    pcre_uchar buff[6];
    (void)_pcre_ord2utf8(c, buff);    (void)PRIV(ord2utf)(c, buff);
     SET_BIT(buff[0]);      SET_BIT(buff[0]);
     }      }
   }    }
   #endif
 }  }
   
   
Line 634  Returns:         nothing Line 670  Returns:         nothing
 */  */
   
 static void  static void
set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
   compile_data *cd)    compile_data *cd)
 {  {
 register int c;  register int c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
   #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
   #endif
 }  }
   
   
Line 659  function fails unless the result is SSB_DONE. Line 697  function fails unless the result is SSB_DONE.
 Arguments:  Arguments:
   code         points to an expression    code         points to an expression
   start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
  utf8         TRUE if in UTF-8 mode  utf          TRUE if in UTF-8 / UTF-16 mode
   cd           the block with char table pointers    cd           the block with char table pointers
   
 Returns:       SSB_FAIL     => Failed to find any starting bytes  Returns:       SSB_FAIL     => Failed to find any starting bytes
Line 669  Returns:       SSB_FAIL     => Failed to find any star Line 707  Returns:       SSB_FAIL     => Failed to find any star
 */  */
   
 static int  static int
set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
   compile_data *cd)    compile_data *cd)
 {  {
 register int c;  register int c;
 int yield = SSB_DONE;  int yield = SSB_DONE;
int table_limit = utf8? 16:32;#if defined SUPPORT_UTF && defined COMPILE_PCRE8
 int table_limit = utf? 16:32;
 #else
 int table_limit = 32;
 #endif
   
 #if 0  #if 0
 /* ========================================================================= */  /* ========================================================================= */
Line 696  volatile int dummy; Line 738  volatile int dummy;
 do  do
   {    {
   BOOL try_next = TRUE;    BOOL try_next = TRUE;
  const uschar *tcode = code + 1 + LINK_SIZE;  const pcre_uchar *tcode = code + 1 + LINK_SIZE;
   
   if (*code == OP_CBRA || *code == OP_SCBRA ||    if (*code == OP_CBRA || *code == OP_SCBRA ||
      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
   
   while (try_next)    /* Loop for items in this branch */    while (try_next)    /* Loop for items in this branch */
     {      {
Line 785  do Line 827  do
       case OP_SOM:        case OP_SOM:
       case OP_THEN:        case OP_THEN:
       case OP_THEN_ARG:        case OP_THEN_ARG:
   #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       case OP_XCLASS:        case OP_XCLASS:
   #endif
       return SSB_FAIL;        return SSB_FAIL;
   
       /* We can ignore word boundary tests. */        /* We can ignore word boundary tests. */
Line 811  do Line 855  do
       case OP_ONCE:        case OP_ONCE:
       case OP_ONCE_NC:        case OP_ONCE_NC:
       case OP_ASSERT:        case OP_ASSERT:
      rc = set_start_bits(tcode, start_bits, utf8, cd);      rc = set_start_bits(tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
       if (rc == SSB_DONE) try_next = FALSE; else        if (rc == SSB_DONE) try_next = FALSE; else
         {          {
Line 858  do Line 902  do
       case OP_BRAZERO:        case OP_BRAZERO:
       case OP_BRAMINZERO:        case OP_BRAMINZERO:
       case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
      rc = set_start_bits(++tcode, start_bits, utf8, cd);      rc = set_start_bits(++tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
 /* =========================================================================  /* =========================================================================
       See the comment at the head of this function concerning the next line,        See the comment at the head of this function concerning the next line,
Line 885  do Line 929  do
       case OP_QUERY:        case OP_QUERY:
       case OP_MINQUERY:        case OP_MINQUERY:
       case OP_POSQUERY:        case OP_POSQUERY:
      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
       break;        break;
   
       case OP_STARI:        case OP_STARI:
Line 894  do Line 938  do
       case OP_QUERYI:        case OP_QUERYI:
       case OP_MINQUERYI:        case OP_MINQUERYI:
       case OP_POSQUERYI:        case OP_POSQUERYI:
      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       break;        break;
   
       /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
Line 902  do Line 946  do
       case OP_UPTO:        case OP_UPTO:
       case OP_MINUPTO:        case OP_MINUPTO:
       case OP_POSUPTO:        case OP_POSUPTO:
      tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
       break;        break;
   
       case OP_UPTOI:        case OP_UPTOI:
       case OP_MINUPTOI:        case OP_MINUPTOI:
       case OP_POSUPTOI:        case OP_POSUPTOI:
      tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
       break;        break;
   
       /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
   
       case OP_EXACT:        case OP_EXACT:
      tcode += 2;      tcode += IMM2_SIZE;
       /* Fall through */        /* Fall through */
       case OP_CHAR:        case OP_CHAR:
       case OP_PLUS:        case OP_PLUS:
       case OP_MINPLUS:        case OP_MINPLUS:
       case OP_POSPLUS:        case OP_POSPLUS:
      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
       case OP_EXACTI:        case OP_EXACTI:
      tcode += 2;      tcode += IMM2_SIZE;
       /* Fall through */        /* Fall through */
       case OP_CHARI:        case OP_CHARI:
       case OP_PLUSI:        case OP_PLUSI:
       case OP_MINPLUSI:        case OP_MINPLUSI:
       case OP_POSPLUSI:        case OP_POSPLUSI:
      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
Line 944  do Line 988  do
       case OP_HSPACE:        case OP_HSPACE:
       SET_BIT(0x09);        SET_BIT(0x09);
       SET_BIT(0x20);        SET_BIT(0x20);
      if (utf8)#ifdef SUPPORT_UTF
       if (utf)
         {          {
   #ifdef COMPILE_PCRE8
         SET_BIT(0xC2);  /* For U+00A0 */          SET_BIT(0xC2);  /* For U+00A0 */
         SET_BIT(0xE1);  /* For U+1680, U+180E */          SET_BIT(0xE1);  /* For U+1680, U+180E */
         SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
         SET_BIT(0xE3);  /* For U+3000 */          SET_BIT(0xE3);  /* For U+3000 */
   #endif
   #ifdef COMPILE_PCRE16
           SET_BIT(0xA0);
           SET_BIT(0xFF);  /* For characters > 255 */
   #endif
         }          }
      else SET_BIT(0xA0);      else
 #endif /* SUPPORT_UTF */
         {
         SET_BIT(0xA0);
 #ifdef COMPILE_PCRE16
         SET_BIT(0xFF);  /* For characters > 255 */
 #endif
         }
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
Line 961  do Line 1019  do
       SET_BIT(0x0B);        SET_BIT(0x0B);
       SET_BIT(0x0C);        SET_BIT(0x0C);
       SET_BIT(0x0D);        SET_BIT(0x0D);
      if (utf8)#ifdef SUPPORT_UTF
       if (utf)
         {          {
   #ifdef COMPILE_PCRE8
         SET_BIT(0xC2);  /* For U+0085 */          SET_BIT(0xC2);  /* For U+0085 */
         SET_BIT(0xE2);  /* For U+2028, U+2029 */          SET_BIT(0xE2);  /* For U+2028, U+2029 */
   #endif
   #ifdef COMPILE_PCRE16
           SET_BIT(0x85);
           SET_BIT(0xFF);  /* For characters > 255 */
   #endif
         }          }
      else SET_BIT(0x85);      else
 #endif /* SUPPORT_UTF */
         {
         SET_BIT(0x85);
 #ifdef COMPILE_PCRE16
         SET_BIT(0xFF);  /* For characters > 255 */
 #endif
         }
       try_next = FALSE;        try_next = FALSE;
       break;        break;
   
Line 1024  do Line 1096  do
       break;        break;
   
       case OP_TYPEEXACT:        case OP_TYPEEXACT:
      tcode += 3;      tcode += 1 + IMM2_SIZE;
       break;        break;
   
       /* Zero or more repeats of character types set the bits and then        /* Zero or more repeats of character types set the bits and then
Line 1033  do Line 1105  do
       case OP_TYPEUPTO:        case OP_TYPEUPTO:
       case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
       case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
      tcode += 2;               /* Fall through */      tcode += IMM2_SIZE;  /* Fall through */
   
       case OP_TYPESTAR:        case OP_TYPESTAR:
       case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
Line 1051  do Line 1123  do
         case OP_HSPACE:          case OP_HSPACE:
         SET_BIT(0x09);          SET_BIT(0x09);
         SET_BIT(0x20);          SET_BIT(0x20);
        if (utf8)#ifdef SUPPORT_UTF
         if (utf)
           {            {
   #ifdef COMPILE_PCRE8
           SET_BIT(0xC2);  /* For U+00A0 */            SET_BIT(0xC2);  /* For U+00A0 */
           SET_BIT(0xE1);  /* For U+1680, U+180E */            SET_BIT(0xE1);  /* For U+1680, U+180E */
           SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
           SET_BIT(0xE3);  /* For U+3000 */            SET_BIT(0xE3);  /* For U+3000 */
   #endif
   #ifdef COMPILE_PCRE16
             SET_BIT(0xA0);
             SET_BIT(0xFF);  /* For characters > 255 */
   #endif
           }            }
        else SET_BIT(0xA0);        else
 #endif /* SUPPORT_UTF */
           SET_BIT(0xA0);
         break;          break;
   
         case OP_ANYNL:          case OP_ANYNL:
Line 1067  do Line 1148  do
         SET_BIT(0x0B);          SET_BIT(0x0B);
         SET_BIT(0x0C);          SET_BIT(0x0C);
         SET_BIT(0x0D);          SET_BIT(0x0D);
        if (utf8)#ifdef SUPPORT_UTF
         if (utf)
           {            {
   #ifdef COMPILE_PCRE8
           SET_BIT(0xC2);  /* For U+0085 */            SET_BIT(0xC2);  /* For U+0085 */
           SET_BIT(0xE2);  /* For U+2028, U+2029 */            SET_BIT(0xE2);  /* For U+2028, U+2029 */
   #endif
   #ifdef COMPILE_PCRE16
             SET_BIT(0x85);
             SET_BIT(0xFF);  /* For characters > 255 */
   #endif
           }            }
        else SET_BIT(0x85);        else
 #endif /* SUPPORT_UTF */
           SET_BIT(0x85);
         break;          break;
   
         case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
Line 1119  do Line 1209  do
       character with a value > 255. */        character with a value > 255. */
   
       case OP_NCLASS:        case OP_NCLASS:
#ifdef SUPPORT_UTF8#if defined SUPPORT_UTF && defined COMPILE_PCRE8
      if (utf8)      if (utf)
         {          {
         start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */          start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
         memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
         }          }
 #endif  #endif
   #ifdef COMPILE_PCRE16
         SET_BIT(0xFF);                         /* For characters > 255 */
   #endif
       /* Fall through */        /* Fall through */
   
       case OP_CLASS:        case OP_CLASS:
         {          {
           pcre_uint8 *map;
         tcode++;          tcode++;
           map = (pcre_uint8 *)tcode;
   
         /* In UTF-8 mode, the bits in a bit map correspond to character          /* In UTF-8 mode, the bits in a bit map correspond to character
         values, not to byte values. However, the bit map we are constructing is          values, not to byte values. However, the bit map we are constructing is
Line 1138  do Line 1233  do
         value is > 127. In fact, there are only two possible starting bytes for          value is > 127. In fact, there are only two possible starting bytes for
         characters in the range 128 - 255. */          characters in the range 128 - 255. */
   
#ifdef SUPPORT_UTF8#if defined SUPPORT_UTF && defined COMPILE_PCRE8
        if (utf8)        if (utf)
           {            {
          for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];          for (c = 0; c < 16; c++) start_bits[c] |= map[c];
           for (c = 128; c < 256; c++)            for (c = 128; c < 256; c++)
             {              {
            if ((tcode[c/8] && (1 << (c&7))) != 0)            if ((map[c/8] && (1 << (c&7))) != 0)
               {                {
               int d = (c >> 6) | 0xc0;            /* Set bit for this starter */                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
               start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
Line 1152  do Line 1247  do
               }                }
             }              }
           }            }
   
         /* In non-UTF-8 mode, the two bit maps are completely compatible. */  
   
         else          else
 #endif  #endif
           {            {
          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];          /* In non-UTF-8 mode, the two bit maps are completely compatible. */
           for (c = 0; c < 32; c++) start_bits[c] |= map[c];
           }            }
   
         /* Advance past the bit map, and act on what follows. For a zero          /* Advance past the bit map, and act on what follows. For a zero
         minimum repeat, continue; otherwise stop processing. */          minimum repeat, continue; otherwise stop processing. */
   
        tcode += 32;        tcode += 32 / sizeof(pcre_uchar);
         switch (*tcode)          switch (*tcode)
           {            {
           case OP_CRSTAR:            case OP_CRSTAR:
Line 1176  do Line 1269  do
   
           case OP_CRRANGE:            case OP_CRRANGE:
           case OP_CRMINRANGE:            case OP_CRMINRANGE:
          if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;          if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
             else try_next = FALSE;              else try_next = FALSE;
           break;            break;
   
Line 1205  return yield; Line 1298  return yield;
 *************************************************/  *************************************************/
   
 /* This function is handed a compiled expression that it must study to produce  /* This function is handed a compiled expression that it must study to produce
information that will speed up the matching. It returns a pcre_extra blockinformation that will speed up the matching. It returns a pcre[16]_extra block
 which then gets handed back to pcre_exec().  which then gets handed back to pcre_exec().
   
 Arguments:  Arguments:
Line 1214  Arguments: Line 1307  Arguments:
   errorptr  points to where to place error messages;    errorptr  points to where to place error messages;
             set NULL unless error              set NULL unless error
   
Returns:    pointer to a pcre_extra block, with study_data filled in and theReturns:    pointer to a pcre[16]_extra block, with study_data filled in and
              appropriate flags set;              the appropriate flags set;
             NULL on error or if no optimization possible              NULL on error or if no optimization possible
 */  */
   
   #ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
 pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
   #else
   PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
   pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
   #endif
 {  {
 int min;  int min;
 BOOL bits_set = FALSE;  BOOL bits_set = FALSE;
uschar start_bits[32];pcre_uint8 start_bits[32];
pcre_extra *extra = NULL;PUBL(extra) *extra = NULL;
 pcre_study_data *study;  pcre_study_data *study;
const uschar *tables;const pcre_uint8 *tables;
uschar *code;pcre_uchar *code;
 compile_data compile_block;  compile_data compile_block;
const real_pcre *re = (const real_pcre *)external_re;const REAL_PCRE *re = (const REAL_PCRE *)external_re;
   
 *errorptr = NULL;  *errorptr = NULL;
   
Line 1240  if (re == NULL || re->magic_number != MAGIC_NUMBER) Line 1338  if (re == NULL || re->magic_number != MAGIC_NUMBER)
   return NULL;    return NULL;
   }    }
   
   if ((re->flags & PCRE_MODE) == 0)
     {
   #ifdef COMPILE_PCRE8
     *errorptr = "argument is compiled in 16 bit mode";
   #else
     *errorptr = "argument is compiled in 8 bit mode";
   #endif
     return NULL;
     }
   
 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)  if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
   {    {
   *errorptr = "unknown or incorrect option bit(s) set";    *errorptr = "unknown or incorrect option bit(s) set";
   return NULL;    return NULL;
   }    }
   
code = (uschar *)re + re->name_table_offset +code = (pcre_uchar *)re + re->name_table_offset +
   (re->name_count * re->name_entry_size);    (re->name_count * re->name_entry_size);
   
 /* For an anchored pattern, or an unanchored pattern that has a first char, or  /* For an anchored pattern, or an unanchored pattern that has a first char, or
Line 1261  if ((re->options & PCRE_ANCHORED) == 0 && Line 1369  if ((re->options & PCRE_ANCHORED) == 0 &&
   /* Set the character tables in the block that is passed around */    /* Set the character tables in the block that is passed around */
   
   tables = re->tables;    tables = re->tables;
   
   #ifdef COMPILE_PCRE8
   if (tables == NULL)    if (tables == NULL)
     (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
     (void *)(&tables));      (void *)(&tables));
   #else
     if (tables == NULL)
       (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
       (void *)(&tables));
   #endif
   
   compile_block.lcc = tables + lcc_offset;    compile_block.lcc = tables + lcc_offset;
   compile_block.fcc = tables + fcc_offset;    compile_block.fcc = tables + fcc_offset;
Line 1272  if ((re->options & PCRE_ANCHORED) == 0 && Line 1387  if ((re->options & PCRE_ANCHORED) == 0 &&
   
   /* See if we can find a fixed set of initial characters for the pattern. */    /* See if we can find a fixed set of initial characters for the pattern. */
   
  memset(start_bits, 0, 32 * sizeof(uschar));  memset(start_bits, 0, 32 * sizeof(pcre_uint8));
   rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
     &compile_block);      &compile_block);
   bits_set = rc == SSB_DONE;    bits_set = rc == SSB_DONE;
Line 1293  switch(min = find_minlength(code, code, re->options, 0 Line 1408  switch(min = find_minlength(code, code, re->options, 0
   }    }
   
 /* If a set of starting bytes has been identified, or if the minimum length is  /* If a set of starting bytes has been identified, or if the minimum length is
greater than zero, or if JIT optimization has been requested, get a pcre_extragreater than zero, or if JIT optimization has been requested, get a
block and a pcre_study_data block. The study data is put in the latter, whichpcre[16]_extra block and a pcre_study_data block. The study data is put in the
is pointed to by the former, which may also get additional data set later bylatter, which is pointed to by the former, which may also get additional data
the calling program. At the moment, the size of pcre_study_data is fixed. Weset later by the calling program. At the moment, the size of pcre_study_data
nevertheless save it in a field for returning via the pcre_fullinfo() functionis fixed. We nevertheless save it in a field for returning via the
so that if it becomes variable in the future, we don't have to change thatpcre_fullinfo() function so that if it becomes variable in the future,
code. */we don't have to change that code. */
   
 if (bits_set || min > 0  if (bits_set || min > 0
 #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
    || (options & PCRE_STUDY_JIT_COMPILE) != 0    || (options & (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
                  | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)) != 0
 #endif  #endif
   )    )
   {    {
  extra = (pcre_extra *)(pcre_malloc)  extra = (PUBL(extra) *)(PUBL(malloc))
    (sizeof(pcre_extra) + sizeof(pcre_study_data));    (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
   if (extra == NULL)    if (extra == NULL)
     {      {
     *errorptr = "failed to get memory";      *errorptr = "failed to get memory";
     return NULL;      return NULL;
     }      }
   
  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));  study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
   extra->flags = PCRE_EXTRA_STUDY_DATA;    extra->flags = PCRE_EXTRA_STUDY_DATA;
   extra->study_data = study;    extra->study_data = study;
   
Line 1331  if (bits_set || min > 0 Line 1447  if (bits_set || min > 0
     study->flags |= PCRE_STUDY_MAPPED;      study->flags |= PCRE_STUDY_MAPPED;
     memcpy(study->start_bits, start_bits, sizeof(start_bits));      memcpy(study->start_bits, start_bits, sizeof(start_bits));
     }      }
  else memset(study->start_bits, 0, 32 * sizeof(uschar));  else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
   
   #ifdef PCRE_DEBUG
     if (bits_set)
       {
       pcre_uint8 *ptr = start_bits;
       int i;
   
       printf("Start bits:\n");
       for (i = 0; i < 32; i++)
         printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
       }
   #endif
   
   /* Always set the minlength value in the block, because the JIT compiler    /* Always set the minlength value in the block, because the JIT compiler
   makes use of it. However, don't set the bit unless the length is greater than    makes use of it. However, don't set the bit unless the length is greater than
   zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time    zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
Line 1351  if (bits_set || min > 0 Line 1479  if (bits_set || min > 0
   
 #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
   extra->executable_jit = NULL;    extra->executable_jit = NULL;
  if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra);  if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
     PRIV(jit_compile)(re, extra, JIT_COMPILE);
   if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
     PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
   if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
     PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
 
   if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)    if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
     {      {
   #ifdef COMPILE_PCRE8
     pcre_free_study(extra);      pcre_free_study(extra);
   #endif
   #ifdef COMPILE_PCRE16
       pcre16_free_study(extra);
   #endif
     extra = NULL;      extra = NULL;
     }      }
 #endif  #endif
Line 1370  return extra; Line 1509  return extra;
   
 /* This function frees the memory that was obtained by pcre_study().  /* This function frees the memory that was obtained by pcre_study().
   
Argument:   a pointer to the pcre_extra blockArgument:   a pointer to the pcre[16]_extra block
 Returns:    nothing  Returns:    nothing
 */  */
   
   #ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void  PCRE_EXP_DEFN void
 pcre_free_study(pcre_extra *extra)  pcre_free_study(pcre_extra *extra)
   #else
   PCRE_EXP_DEFN void
   pcre16_free_study(pcre16_extra *extra)
   #endif
 {  {
   if (extra == NULL)
     return;
 #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&  if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
      extra->executable_jit != NULL)       extra->executable_jit != NULL)
  _pcre_jit_free(extra->executable_jit);  PRIV(jit_free)(extra->executable_jit);
 #endif  #endif
pcre_free(extra);PUBL(free)(extra);
 }  }
   
 /* End of pcre_study.c */  /* End of pcre_study.c */

Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.3


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>