--- embedaddon/pcre/pcre_study.c	2012/02/21 23:05:51	1.1.1.1
+++ embedaddon/pcre/pcre_study.c	2012/02/21 23:50:25	1.1.1.2
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -78,17 +78,18 @@ Returns:   the minimum length
 */
 
 static int
-find_minlength(const uschar *code, const uschar *startcode, int options,
+find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
   int recurse_depth)
 {
 int length = -1;
-BOOL utf8 = (options & PCRE_UTF8) != 0;
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+BOOL utf = (options & PCRE_UTF8) != 0;
 BOOL had_recurse = FALSE;
 register int branchlength = 0;
-register uschar *cc = (uschar *)code + 1 + LINK_SIZE;
+register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
 
 if (*code == OP_CBRA || *code == OP_SCBRA ||
-    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2;
+    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
 
 /* Scan along the opcodes for this branch. If we get to the end of the
 branch, check the length against that of the other branches. */
@@ -96,7 +97,7 @@ branch, check the length against that of the other bra
 for (;;)
   {
   int d, min;
-  uschar *cs, *ce;
+  pcre_uchar *cs, *ce;
   register int op = *cc;
 
   switch (op)
@@ -189,7 +190,7 @@ for (;;)
     case OP_DOLLM:
     case OP_NOT_WORD_BOUNDARY:
     case OP_WORD_BOUNDARY:
-    cc += _pcre_OP_lengths[*cc];
+    cc += PRIV(OP_lengths)[*cc];
     break;
 
     /* Skip over a subpattern that has a {0} or {0,x} quantifier */
@@ -198,7 +199,7 @@ for (;;)
     case OP_BRAMINZERO:
     case OP_BRAPOSZERO:
     case OP_SKIPZERO:
-    cc += _pcre_OP_lengths[*cc];
+    cc += PRIV(OP_lengths)[*cc];
     do cc += GET(cc, 1); while (*cc == OP_ALT);
     cc += 1 + LINK_SIZE;
     break;
@@ -223,8 +224,8 @@ for (;;)
     case OP_NOTPOSPLUSI:
     branchlength++;
     cc += 2;
-#ifdef SUPPORT_UTF8
-    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+#ifdef SUPPORT_UTF
+    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
     break;
 
@@ -243,15 +244,16 @@ for (;;)
     case OP_NOTEXACT:
     case OP_NOTEXACTI:
     branchlength += GET2(cc,1);
-    cc += 4;
-#ifdef SUPPORT_UTF8
-    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+    cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
     break;
 
     case OP_TYPEEXACT:
     branchlength += GET2(cc,1);
-    cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;
+    cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
+      || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
     break;
 
     /* Handle single-char non-literal matchers */
@@ -291,8 +293,8 @@ for (;;)
     appear, but leave the code, just in case.) */
 
     case OP_ANYBYTE:
-#ifdef SUPPORT_UTF8
-    if (utf8) return -1;
+#ifdef SUPPORT_UTF
+    if (utf) return -1;
 #endif
     branchlength++;
     cc++;
@@ -308,27 +310,28 @@ for (;;)
     case OP_TYPEPOSSTAR:
     case OP_TYPEPOSQUERY:
     if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
-    cc += _pcre_OP_lengths[op];
+    cc += PRIV(OP_lengths)[op];
     break;
 
     case OP_TYPEUPTO:
     case OP_TYPEMINUPTO:
     case OP_TYPEPOSUPTO:
-    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
-    cc += _pcre_OP_lengths[op];
+    if (cc[1 + IMM2_SIZE] == OP_PROP
+      || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
+    cc += PRIV(OP_lengths)[op];
     break;
 
     /* Check a class for variable quantification */
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
-    cc += GET(cc, 1) - 33;
+    cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
     /* Fall through */
 #endif
 
     case OP_CLASS:
     case OP_NCLASS:
-    cc += 33;
+    cc += PRIV(OP_lengths)[OP_CLASS];
 
     switch (*cc)
       {
@@ -347,7 +350,7 @@ for (;;)
       case OP_CRRANGE:
       case OP_CRMINRANGE:
       branchlength += GET2(cc,1);
-      cc += 5;
+      cc += 1 + 2 * IMM2_SIZE;
       break;
 
       default:
@@ -372,7 +375,7 @@ for (;;)
     case OP_REFI:
     if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
       {
-      ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
+      ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
       if (cs == NULL) return -2;
       do ce += GET(ce, 1); while (*ce == OP_ALT);
       if (cc > cs && cc < ce)
@@ -386,7 +389,7 @@ for (;;)
         }
       }
     else d = 0;
-    cc += 3;
+    cc += 1 + IMM2_SIZE;
 
     /* Handle repeated back references */
 
@@ -409,7 +412,7 @@ for (;;)
       case OP_CRRANGE:
       case OP_CRMINRANGE:
       min = GET2(cc, 1);
-      cc += 5;
+      cc += 1 + 2 * IMM2_SIZE;
       break;
 
       default:
@@ -424,7 +427,7 @@ for (;;)
     caught by a recursion depth count. */
 
     case OP_RECURSE:
-    cs = ce = (uschar *)startcode + GET(cc, 1);
+    cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
     do ce += GET(ce, 1); while (*ce == OP_ALT);
     if ((cc > cs && cc < ce) || recurse_depth > 10)
       had_recurse = TRUE;
@@ -482,9 +485,9 @@ for (;;)
     case OP_NOTPOSQUERY:
     case OP_NOTPOSQUERYI:
 
-    cc += _pcre_OP_lengths[op];
-#ifdef SUPPORT_UTF8
-    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+    cc += PRIV(OP_lengths)[op];
+#ifdef SUPPORT_UTF
+    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
     break;
 
@@ -494,7 +497,7 @@ for (;;)
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
     case OP_THEN_ARG:
-    cc += _pcre_OP_lengths[op] + cc[1];
+    cc += PRIV(OP_lengths)[op] + cc[1];
     break;
 
     /* The remaining opcodes are just skipped over. */
@@ -506,7 +509,7 @@ for (;;)
     case OP_SET_SOM:
     case OP_SKIP:
     case OP_THEN:
-    cc += _pcre_OP_lengths[op];
+    cc += PRIV(OP_lengths)[op];
     break;
 
     /* This should not occur: we list all opcodes explicitly so that when
@@ -535,29 +538,30 @@ Arguments:
   p             points to the character
   caseless      the caseless flag
   cd            the block with char table pointers
-  utf8          TRUE for UTF-8 mode
+  utf           TRUE for UTF-8 / UTF-16 mode
 
 Returns:        pointer after the character
 */
 
-static const uschar *
-set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,
-  compile_data *cd, BOOL utf8)
+static const pcre_uchar *
+set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
+  compile_data *cd, BOOL utf)
 {
 unsigned int c = *p;
 
+#ifdef COMPILE_PCRE8
 SET_BIT(c);
 
-#ifdef SUPPORT_UTF8
-if (utf8 && c > 127)
+#ifdef SUPPORT_UTF
+if (utf && c > 127)
   {
   GETCHARINC(c, p);
 #ifdef SUPPORT_UCP
   if (caseless)
     {
-    uschar buff[8];
+    pcre_uchar buff[6];
     c = UCD_OTHERCASE(c);
-    (void)_pcre_ord2utf8(c, buff);
+    (void)PRIV(ord2utf)(c, buff);
     SET_BIT(buff[0]);
     }
 #endif
@@ -569,6 +573,36 @@ if (utf8 && c > 127)
 
 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
 return p + 1;
+#endif
+
+#ifdef COMPILE_PCRE16
+if (c > 0xff)
+  {
+  c = 0xff;
+  caseless = FALSE;
+  }
+SET_BIT(c);
+
+#ifdef SUPPORT_UTF
+if (utf && c > 127)
+  {
+  GETCHARINC(c, p);
+#ifdef SUPPORT_UCP
+  if (caseless)
+    {
+    c = UCD_OTHERCASE(c);
+    if (c > 0xff)
+      c = 0xff;
+    SET_BIT(c);
+    }
+#endif
+  return p;
+  }
+#endif
+
+if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
+return p + 1;
+#endif
 }
 
 
@@ -594,21 +628,23 @@ Returns:         nothing
 */
 
 static void
-set_type_bits(uschar *start_bits, int cbit_type, int table_limit,
+set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
   compile_data *cd)
 {
 register int c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
 if (table_limit == 32) return;
 for (c = 128; c < 256; c++)
   {
   if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
     {
-    uschar buff[8];
-    (void)_pcre_ord2utf8(c, buff);
+    pcre_uchar buff[6];
+    (void)PRIV(ord2utf)(c, buff);
     SET_BIT(buff[0]);
     }
   }
+#endif
 }
 
 
@@ -634,12 +670,14 @@ Returns:         nothing
 */
 
 static void
-set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,
+set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
   compile_data *cd)
 {
 register int c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
+#endif
 }
 
 
@@ -659,7 +697,7 @@ function fails unless the result is SSB_DONE.
 Arguments:
   code         points to an expression
   start_bits   points to a 32-byte table, initialized to 0
-  utf8         TRUE if in UTF-8 mode
+  utf          TRUE if in UTF-8 / UTF-16 mode
   cd           the block with char table pointers
 
 Returns:       SSB_FAIL     => Failed to find any starting bytes
@@ -669,12 +707,16 @@ Returns:       SSB_FAIL     => Failed to find any star
 */
 
 static int
-set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,
+set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
   compile_data *cd)
 {
 register int c;
 int yield = SSB_DONE;
-int table_limit = utf8? 16:32;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+int table_limit = utf? 16:32;
+#else
+int table_limit = 32;
+#endif
 
 #if 0
 /* ========================================================================= */
@@ -696,10 +738,10 @@ volatile int dummy;
 do
   {
   BOOL try_next = TRUE;
-  const uschar *tcode = code + 1 + LINK_SIZE;
+  const pcre_uchar *tcode = code + 1 + LINK_SIZE;
 
   if (*code == OP_CBRA || *code == OP_SCBRA ||
-      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;
+      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
 
   while (try_next)    /* Loop for items in this branch */
     {
@@ -785,7 +827,9 @@ do
       case OP_SOM:
       case OP_THEN:
       case OP_THEN_ARG:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       case OP_XCLASS:
+#endif
       return SSB_FAIL;
 
       /* We can ignore word boundary tests. */
@@ -811,7 +855,7 @@ do
       case OP_ONCE:
       case OP_ONCE_NC:
       case OP_ASSERT:
-      rc = set_start_bits(tcode, start_bits, utf8, cd);
+      rc = set_start_bits(tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
       if (rc == SSB_DONE) try_next = FALSE; else
         {
@@ -858,7 +902,7 @@ do
       case OP_BRAZERO:
       case OP_BRAMINZERO:
       case OP_BRAPOSZERO:
-      rc = set_start_bits(++tcode, start_bits, utf8, cd);
+      rc = set_start_bits(++tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
 /* =========================================================================
       See the comment at the head of this function concerning the next line,
@@ -885,7 +929,7 @@ do
       case OP_QUERY:
       case OP_MINQUERY:
       case OP_POSQUERY:
-      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
+      tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
       break;
 
       case OP_STARI:
@@ -894,7 +938,7 @@ do
       case OP_QUERYI:
       case OP_MINQUERYI:
       case OP_POSQUERYI:
-      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
+      tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       break;
 
       /* Single-char upto sets the bit and tries the next */
@@ -902,36 +946,36 @@ do
       case OP_UPTO:
       case OP_MINUPTO:
       case OP_POSUPTO:
-      tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);
+      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
       break;
 
       case OP_UPTOI:
       case OP_MINUPTOI:
       case OP_POSUPTOI:
-      tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);
+      tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
       break;
 
       /* At least one single char sets the bit and stops */
 
       case OP_EXACT:
-      tcode += 2;
+      tcode += IMM2_SIZE;
       /* Fall through */
       case OP_CHAR:
       case OP_PLUS:
       case OP_MINPLUS:
       case OP_POSPLUS:
-      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
+      (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
       try_next = FALSE;
       break;
 
       case OP_EXACTI:
-      tcode += 2;
+      tcode += IMM2_SIZE;
       /* Fall through */
       case OP_CHARI:
       case OP_PLUSI:
       case OP_MINPLUSI:
       case OP_POSPLUSI:
-      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
+      (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       try_next = FALSE;
       break;
 
@@ -944,14 +988,28 @@ do
       case OP_HSPACE:
       SET_BIT(0x09);
       SET_BIT(0x20);
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
+#ifdef COMPILE_PCRE8
         SET_BIT(0xC2);  /* For U+00A0 */
         SET_BIT(0xE1);  /* For U+1680, U+180E */
         SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
         SET_BIT(0xE3);  /* For U+3000 */
+#endif
+#ifdef COMPILE_PCRE16
+        SET_BIT(0xA0);
+        SET_BIT(0xFF);  /* For characters > 255 */
+#endif
         }
-      else SET_BIT(0xA0);
+      else
+#endif /* SUPPORT_UTF */
+        {
+        SET_BIT(0xA0);
+#ifdef COMPILE_PCRE16
+        SET_BIT(0xFF);  /* For characters > 255 */
+#endif
+        }
       try_next = FALSE;
       break;
 
@@ -961,12 +1019,26 @@ do
       SET_BIT(0x0B);
       SET_BIT(0x0C);
       SET_BIT(0x0D);
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
+#ifdef COMPILE_PCRE8
         SET_BIT(0xC2);  /* For U+0085 */
         SET_BIT(0xE2);  /* For U+2028, U+2029 */
+#endif
+#ifdef COMPILE_PCRE16
+        SET_BIT(0x85);
+        SET_BIT(0xFF);  /* For characters > 255 */
+#endif
         }
-      else SET_BIT(0x85);
+      else
+#endif /* SUPPORT_UTF */
+        {
+        SET_BIT(0x85);
+#ifdef COMPILE_PCRE16
+        SET_BIT(0xFF);  /* For characters > 255 */
+#endif
+        }
       try_next = FALSE;
       break;
 
@@ -1024,7 +1096,7 @@ do
       break;
 
       case OP_TYPEEXACT:
-      tcode += 3;
+      tcode += 1 + IMM2_SIZE;
       break;
 
       /* Zero or more repeats of character types set the bits and then
@@ -1033,7 +1105,7 @@ do
       case OP_TYPEUPTO:
       case OP_TYPEMINUPTO:
       case OP_TYPEPOSUPTO:
-      tcode += 2;               /* Fall through */
+      tcode += IMM2_SIZE;  /* Fall through */
 
       case OP_TYPESTAR:
       case OP_TYPEMINSTAR:
@@ -1051,14 +1123,23 @@ do
         case OP_HSPACE:
         SET_BIT(0x09);
         SET_BIT(0x20);
-        if (utf8)
+#ifdef COMPILE_PCRE8
+        if (utf)
           {
+#ifdef COMPILE_PCRE8
           SET_BIT(0xC2);  /* For U+00A0 */
           SET_BIT(0xE1);  /* For U+1680, U+180E */
           SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
           SET_BIT(0xE3);  /* For U+3000 */
+#endif
+#ifdef COMPILE_PCRE16
+          SET_BIT(0xA0);
+          SET_BIT(0xFF);  /* For characters > 255 */
+#endif
           }
-        else SET_BIT(0xA0);
+        else
+#endif /* SUPPORT_UTF */
+          SET_BIT(0xA0);
         break;
 
         case OP_ANYNL:
@@ -1067,12 +1148,21 @@ do
         SET_BIT(0x0B);
         SET_BIT(0x0C);
         SET_BIT(0x0D);
-        if (utf8)
+#ifdef COMPILE_PCRE8
+        if (utf)
           {
+#ifdef COMPILE_PCRE8
           SET_BIT(0xC2);  /* For U+0085 */
           SET_BIT(0xE2);  /* For U+2028, U+2029 */
+#endif
+#ifdef COMPILE_PCRE16
+          SET_BIT(0x85);
+          SET_BIT(0xFF);  /* For characters > 255 */
+#endif
           }
-        else SET_BIT(0x85);
+        else
+#endif /* SUPPORT_UTF */
+          SET_BIT(0x85);
         break;
 
         case OP_NOT_DIGIT:
@@ -1119,18 +1209,23 @@ do
       character with a value > 255. */
 
       case OP_NCLASS:
-#ifdef SUPPORT_UTF8
-      if (utf8)
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+      if (utf)
         {
         start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
         memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
         }
 #endif
+#ifdef COMPILE_PCRE16
+      SET_BIT(0xFF);                         /* For characters > 255 */
+#endif
       /* Fall through */
 
       case OP_CLASS:
         {
+        pcre_uint8 *map;
         tcode++;
+        map = (pcre_uint8 *)tcode;
 
         /* In UTF-8 mode, the bits in a bit map correspond to character
         values, not to byte values. However, the bit map we are constructing is
@@ -1138,13 +1233,13 @@ do
         value is > 127. In fact, there are only two possible starting bytes for
         characters in the range 128 - 255. */
 
-#ifdef SUPPORT_UTF8
-        if (utf8)
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+        if (utf)
           {
-          for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
+          for (c = 0; c < 16; c++) start_bits[c] |= map[c];
           for (c = 128; c < 256; c++)
             {
-            if ((tcode[c/8] && (1 << (c&7))) != 0)
+            if ((map[c/8] && (1 << (c&7))) != 0)
               {
               int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
               start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
@@ -1152,19 +1247,17 @@ do
               }
             }
           }
-
-        /* In non-UTF-8 mode, the two bit maps are completely compatible. */
-
         else
 #endif
           {
-          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
+          /* In non-UTF-8 mode, the two bit maps are completely compatible. */
+          for (c = 0; c < 32; c++) start_bits[c] |= map[c];
           }
 
         /* Advance past the bit map, and act on what follows. For a zero
         minimum repeat, continue; otherwise stop processing. */
 
-        tcode += 32;
+        tcode += 32 / sizeof(pcre_uchar);
         switch (*tcode)
           {
           case OP_CRSTAR:
@@ -1176,7 +1269,7 @@ do
 
           case OP_CRRANGE:
           case OP_CRMINRANGE:
-          if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
+          if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
             else try_next = FALSE;
           break;
 
@@ -1205,7 +1298,7 @@ return yield;
 *************************************************/
 
 /* This function is handed a compiled expression that it must study to produce
-information that will speed up the matching. It returns a pcre_extra block
+information that will speed up the matching. It returns a pcre[16]_extra block
 which then gets handed back to pcre_exec().
 
 Arguments:
@@ -1214,23 +1307,28 @@ Arguments:
   errorptr  points to where to place error messages;
             set NULL unless error
 
-Returns:    pointer to a pcre_extra block, with study_data filled in and the
-              appropriate flags set;
+Returns:    pointer to a pcre[16]_extra block, with study_data filled in and
+              the appropriate flags set;
             NULL on error or if no optimization possible
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
 pcre_study(const pcre *external_re, int options, const char **errorptr)
+#else
+PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
+pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
+#endif
 {
 int min;
 BOOL bits_set = FALSE;
-uschar start_bits[32];
-pcre_extra *extra = NULL;
+pcre_uint8 start_bits[32];
+PUBL(extra) *extra = NULL;
 pcre_study_data *study;
-const uschar *tables;
-uschar *code;
+const pcre_uint8 *tables;
+pcre_uchar *code;
 compile_data compile_block;
-const real_pcre *re = (const real_pcre *)external_re;
+const REAL_PCRE *re = (const REAL_PCRE *)external_re;
 
 *errorptr = NULL;
 
@@ -1240,13 +1338,23 @@ if (re == NULL || re->magic_number != MAGIC_NUMBER)
   return NULL;
   }
 
+if ((re->flags & PCRE_MODE) == 0)
+  {
+#ifdef COMPILE_PCRE8
+  *errorptr = "argument is compiled in 16 bit mode";
+#else
+  *errorptr = "argument is compiled in 8 bit mode";
+#endif
+  return NULL;
+  }
+
 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
   {
   *errorptr = "unknown or incorrect option bit(s) set";
   return NULL;
   }
 
-code = (uschar *)re + re->name_table_offset +
+code = (pcre_uchar *)re + re->name_table_offset +
   (re->name_count * re->name_entry_size);
 
 /* For an anchored pattern, or an unanchored pattern that has a first char, or
@@ -1261,9 +1369,16 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
   /* Set the character tables in the block that is passed around */
 
   tables = re->tables;
+
+#ifdef COMPILE_PCRE8
   if (tables == NULL)
     (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
     (void *)(&tables));
+#else
+  if (tables == NULL)
+    (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
+    (void *)(&tables));
+#endif
 
   compile_block.lcc = tables + lcc_offset;
   compile_block.fcc = tables + fcc_offset;
@@ -1272,7 +1387,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
 
   /* See if we can find a fixed set of initial characters for the pattern. */
 
-  memset(start_bits, 0, 32 * sizeof(uschar));
+  memset(start_bits, 0, 32 * sizeof(pcre_uint8));
   rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
     &compile_block);
   bits_set = rc == SSB_DONE;
@@ -1293,13 +1408,13 @@ switch(min = find_minlength(code, code, re->options, 0
   }
 
 /* If a set of starting bytes has been identified, or if the minimum length is
-greater than zero, or if JIT optimization has been requested, get a pcre_extra
-block and a pcre_study_data block. The study data is put in the latter, which
-is pointed to by the former, which may also get additional data set later by
-the calling program. At the moment, the size of pcre_study_data is fixed. We
-nevertheless save it in a field for returning via the pcre_fullinfo() function
-so that if it becomes variable in the future, we don't have to change that
-code. */
+greater than zero, or if JIT optimization has been requested, get a
+pcre[16]_extra block and a pcre_study_data block. The study data is put in the
+latter, which is pointed to by the former, which may also get additional data
+set later by the calling program. At the moment, the size of pcre_study_data
+is fixed. We nevertheless save it in a field for returning via the
+pcre_fullinfo() function so that if it becomes variable in the future,
+we don't have to change that code. */
 
 if (bits_set || min > 0
 #ifdef SUPPORT_JIT
@@ -1307,15 +1422,15 @@ if (bits_set || min > 0
 #endif
   )
   {
-  extra = (pcre_extra *)(pcre_malloc)
-    (sizeof(pcre_extra) + sizeof(pcre_study_data));
+  extra = (PUBL(extra) *)(PUBL(malloc))
+    (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
   if (extra == NULL)
     {
     *errorptr = "failed to get memory";
     return NULL;
     }
 
-  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
+  study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
   extra->flags = PCRE_EXTRA_STUDY_DATA;
   extra->study_data = study;
 
@@ -1331,8 +1446,20 @@ if (bits_set || min > 0
     study->flags |= PCRE_STUDY_MAPPED;
     memcpy(study->start_bits, start_bits, sizeof(start_bits));
     }
-  else memset(study->start_bits, 0, 32 * sizeof(uschar));
+  else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
 
+#ifdef PCRE_DEBUG
+  if (bits_set)
+    {
+    pcre_uint8 *ptr = start_bits;
+    int i;
+
+    printf("Start bits:\n");
+    for (i = 0; i < 32; i++)
+      printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
+    }
+#endif
+
   /* Always set the minlength value in the block, because the JIT compiler
   makes use of it. However, don't set the bit unless the length is greater than
   zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
@@ -1351,10 +1478,15 @@ if (bits_set || min > 0
 
 #ifdef SUPPORT_JIT
   extra->executable_jit = NULL;
-  if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra);
+  if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra);
   if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
     {
+#ifdef COMPILE_PCRE8
     pcre_free_study(extra);
+#endif
+#ifdef COMPILE_PCRE16
+    pcre16_free_study(extra);
+#endif
     extra = NULL;
     }
 #endif
@@ -1370,19 +1502,26 @@ return extra;
 
 /* This function frees the memory that was obtained by pcre_study().
 
-Argument:   a pointer to the pcre_extra block
+Argument:   a pointer to the pcre[16]_extra block
 Returns:    nothing
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void
 pcre_free_study(pcre_extra *extra)
+#else
+PCRE_EXP_DEFN void
+pcre16_free_study(pcre16_extra *extra)
+#endif
 {
+if (extra == NULL)
+  return;
 #ifdef SUPPORT_JIT
 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
      extra->executable_jit != NULL)
-  _pcre_jit_free(extra->executable_jit);
+  PRIV(jit_free)(extra->executable_jit);
 #endif
-pcre_free(extra);
+PUBL(free)(extra);
 }
 
 /* End of pcre_study.c */