--- embedaddon/pcre/pcre_dfa_exec.c	2012/02/21 23:50:25	1.1.1.2
+++ embedaddon/pcre/pcre_dfa_exec.c	2013/07/22 08:25:55	1.1.1.4
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the
 below for why this module is different).
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -38,10 +38,9 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */
 
-
 /* This module contains the external function pcre_dfa_exec(), which is an
 alternative matching function that uses a sort of DFA algorithm (not a true
-FSM). This is NOT Perl- compatible, but it has advantages in certain
+FSM). This is NOT Perl-compatible, but it has advantages in certain
 applications. */
 
 
@@ -282,7 +281,7 @@ typedef struct stateblock {
   int data;                       /* Some use extra data */
 } stateblock;
 
-#define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
 
 
 #ifdef PCRE_DEBUG
@@ -303,13 +302,13 @@ Returns:       nothing
 static void
 pchars(const pcre_uchar *p, int length, FILE *f)
 {
-int c;
+pcre_uint32 c;
 while (length-- > 0)
   {
   if (isprint(c = *(p++)))
     fprintf(f, "%c", c);
   else
-    fprintf(f, "\\x%02x", c);
+    fprintf(f, "\\x{%02x}", c);
   }
 }
 #endif
@@ -382,7 +381,8 @@ for the current character, one for the following chara
     next_new_state->count  = (y); \
     next_new_state->data   = (z); \
     next_new_state++; \
-    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
+    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
+      (x), (y), (z), __LINE__)); \
     } \
   else return PCRE_ERROR_DFA_WSSIZE
 
@@ -424,6 +424,8 @@ BOOL utf = (md->poptions & PCRE_UTF8) != 0;
 BOOL utf = FALSE;
 #endif
 
+BOOL reset_could_continue = FALSE;
+
 rlevel++;
 offsetcount &= (-2);
 
@@ -569,9 +571,11 @@ for (;;)
   {
   int i, j;
   int clen, dlen;
-  unsigned int c, d;
+  pcre_uint32 c, d;
   int forced_fail = 0;
-  BOOL could_continue = FALSE;
+  BOOL partial_newline = FALSE;
+  BOOL could_continue = reset_could_continue;
+  reset_could_continue = FALSE;
 
   /* Make the new state list into the active state list and empty the
   new state list. */
@@ -607,11 +611,12 @@ for (;;)
 
   if (ptr < end_subject)
     {
-    clen = 1;        /* Number of bytes in the character */
+    clen = 1;        /* Number of data items in the character */
 #ifdef SUPPORT_UTF
-    if (utf) { GETCHARLEN(c, ptr, clen); } else
-#endif  /* SUPPORT_UTF */
+    GETCHARLENTEST(c, ptr, clen);
+#else
     c = *ptr;
+#endif  /* SUPPORT_UTF */
     }
   else
     {
@@ -630,7 +635,8 @@ for (;;)
     BOOL caseless = FALSE;
     const pcre_uchar *code;
     int state_offset = current_state->offset;
-    int count, codevalue, rrc;
+    int codevalue, rrc;
+    int count;
 
 #ifdef PCRE_DEBUG
     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -641,7 +647,8 @@ for (;;)
 
     /* A negative offset is a special case meaning "hold off going to this
     (negated) state until the number of characters in the data field have
-    been skipped". */
+    been skipped". If the could_continue flag was passed over from a previous
+    state, arrange for it to passed on. */
 
     if (state_offset < 0)
       {
@@ -650,6 +657,7 @@ for (;;)
         DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
         ADD_NEW_DATA(state_offset, current_state->count,
           current_state->data - 1);
+        if (could_continue) reset_could_continue = TRUE;
         continue;
         }
       else
@@ -689,10 +697,10 @@ for (;;)
     permitted.
 
     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
-    argument that is not a data character - but is always one byte long. We
-    have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
-    this case. To keep the other cases fast, convert these ones to new opcodes.
-    */
+    argument that is not a data character - but is always one byte long because
+    the values are small. We have to take special action to deal with  \P, \p,
+    \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
+    these ones to new opcodes. */
 
     if (coptable[codevalue] > 0)
       {
@@ -783,7 +791,7 @@ for (;;)
             offsets[0] = (int)(current_subject - start_subject);
             offsets[1] = (int)(ptr - start_subject);
             DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
-              offsets[1] - offsets[0], current_subject));
+              offsets[1] - offsets[0], (char *)current_subject));
             }
           if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
             {
@@ -888,7 +896,20 @@ for (;;)
       /*-----------------------------------------------------------------*/
       case OP_ANY:
       if (clen > 0 && !IS_NEWLINE(ptr))
-        { ADD_NEW(state_offset + 1, 0); }
+        {
+        if (ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else
+          {
+          ADD_NEW(state_offset + 1, 0);
+          }
+        }
       break;
 
       /*-----------------------------------------------------------------*/
@@ -916,6 +937,19 @@ for (;;)
                (ptr == end_subject - md->nllen)
             ))
           { ADD_ACTIVE(state_offset + 1, 0); }
+        else if (ptr + 1 >= md->end_subject &&
+                 (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+                 NLBLOCK->nltype == NLTYPE_FIXED &&
+                 NLBLOCK->nllen == 2 &&
+                 c == NLBLOCK->nl[0])
+          {
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            {
+            reset_could_continue = TRUE;
+            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+            }
+          else could_continue = partial_newline = TRUE;
+          }
         }
       break;
 
@@ -928,6 +962,19 @@ for (;;)
         else if (clen == 0 ||
             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
           { ADD_ACTIVE(state_offset + 1, 0); }
+        else if (ptr + 1 >= md->end_subject &&
+                 (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+                 NLBLOCK->nltype == NLTYPE_FIXED &&
+                 NLBLOCK->nllen == 2 &&
+                 c == NLBLOCK->nl[0])
+          {
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            {
+            reset_could_continue = TRUE;
+            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+            }
+          else could_continue = partial_newline = TRUE;
+          }
         }
       else if (IS_NEWLINE(ptr))
         { ADD_ACTIVE(state_offset + 1, 0); }
@@ -962,7 +1009,7 @@ for (;;)
           {
           const pcre_uchar *temp = ptr - 1;
           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
           if (utf) { BACKCHAR(temp); }
 #endif
           GETCHARTEST(d, temp);
@@ -1015,6 +1062,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[1])
           {
@@ -1063,6 +1111,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[2];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1090,7 +1153,15 @@ for (;;)
       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
       if (clen > 0)
         {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
             (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1113,7 +1184,15 @@ for (;;)
       ADD_ACTIVE(state_offset + 2, 0);
       if (clen > 0)
         {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
             (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1135,7 +1214,15 @@ for (;;)
       ADD_ACTIVE(state_offset + 2, 0);
       if (clen > 0)
         {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
             (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1155,12 +1242,20 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
             (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
           {
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -1176,7 +1271,15 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
             (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1186,7 +1289,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -1209,6 +1312,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[2])
           {
@@ -1257,6 +1361,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[3];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1283,8 +1402,9 @@ for (;;)
       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
       count = current_state->count;  /* Already matched */
       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
@@ -1292,14 +1412,16 @@ for (;;)
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nd;
-          int ndlen = 1;
-          GETCHARLEN(nd, nptr, ndlen);
-          if (UCD_CATEGORY(nd) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += ndlen;
+          lgb = rgb;
+          nptr += dlen;
           }
         count++;
         ADD_NEW_DATA(-state_offset, count, ncount);
@@ -1318,20 +1440,22 @@ for (;;)
         int ncount = 0;
         switch (c)
           {
-          case 0x000b:
-          case 0x000c:
-          case 0x0085:
+          case CHAR_VT:
+          case CHAR_FF:
+          case CHAR_NEL:
+#ifndef EBCDIC
           case 0x2028:
           case 0x2029:
+#endif  /* Not EBCDIC */
           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
           goto ANYNL01;
 
-          case 0x000d:
-          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
+          case CHAR_CR:
+          if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */
 
           ANYNL01:
-          case 0x000a:
+          case CHAR_LF:
           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
             {
             active_count--;           /* Remove non-match possibility */
@@ -1358,13 +1482,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x000a:
-          case 0x000b:
-          case 0x000c:
-          case 0x000d:
-          case 0x0085:
-          case 0x2028:
-          case 0x2029:
+          VSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1397,25 +1515,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x09:      /* HT */
-          case 0x20:      /* SPACE */
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          HSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1456,6 +1556,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[2])
           {
@@ -1504,6 +1605,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[3];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1539,8 +1655,9 @@ for (;;)
       QS2:
 
       ADD_ACTIVE(state_offset + 2, 0);
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
@@ -1549,14 +1666,16 @@ for (;;)
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nd;
-          int ndlen = 1;
-          GETCHARLEN(nd, nptr, ndlen);
-          if (UCD_CATEGORY(nd) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += ndlen;
+          lgb = rgb;
+          nptr += dlen;
           }
         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
         }
@@ -1582,27 +1701,29 @@ for (;;)
         int ncount = 0;
         switch (c)
           {
-          case 0x000b:
-          case 0x000c:
-          case 0x0085:
+          case CHAR_VT:
+          case CHAR_FF:
+          case CHAR_NEL:
+#ifndef EBCDIC
           case 0x2028:
           case 0x2029:
+#endif  /* Not EBCDIC */
           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
           goto ANYNL02;
 
-          case 0x000d:
-          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
+          case CHAR_CR:
+          if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */
 
           ANYNL02:
-          case 0x000a:
+          case CHAR_LF:
           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
             {
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
+          ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
           break;
 
           default:
@@ -1630,13 +1751,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x000a:
-          case 0x000b:
-          case 0x000c:
-          case 0x000d:
-          case 0x0085:
-          case 0x2028:
-          case 0x2029:
+          VSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1652,7 +1767,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          ADD_NEW_DATA(-(state_offset + count), 0, 0);
+          ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
           }
         }
       break;
@@ -1676,25 +1791,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x09:      /* HT */
-          case 0x20:      /* SPACE */
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          HSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1711,7 +1808,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          ADD_NEW_DATA(-(state_offset + count), 0, 0);
+          ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
           }
         }
       break;
@@ -1728,6 +1825,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[1 + IMM2_SIZE + 1])
           {
@@ -1776,6 +1874,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1790,7 +1903,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -1806,8 +1919,9 @@ for (;;)
       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
       count = current_state->count;  /* Number already matched */
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
@@ -1815,16 +1929,20 @@ for (;;)
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nd;
-          int ndlen = 1;
-          GETCHARLEN(nd, nptr, ndlen);
-          if (UCD_CATEGORY(nd) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += ndlen;
+          lgb = rgb;
+          nptr += dlen;
           }
-        if (++count >= GET2(code, 1))
+        if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
+        if (++count >= (int)GET2(code, 1))
           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
         else
           { ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -1845,26 +1963,28 @@ for (;;)
         int ncount = 0;
         switch (c)
           {
-          case 0x000b:
-          case 0x000c:
-          case 0x0085:
+          case CHAR_VT:
+          case CHAR_FF:
+          case CHAR_NEL:
+#ifndef EBCDIC
           case 0x2028:
           case 0x2029:
+#endif  /* Not EBCDIC */
           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
           goto ANYNL03;
 
-          case 0x000d:
-          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
+          case CHAR_CR:
+          if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */
 
           ANYNL03:
-          case 0x000a:
+          case CHAR_LF:
           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
             {
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
           else
             { ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -1889,13 +2009,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x000a:
-          case 0x000b:
-          case 0x000c:
-          case 0x000d:
-          case 0x0085:
-          case 0x2028:
-          case 0x2029:
+          VSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1910,7 +2024,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
           else
             { ADD_NEW_DATA(-state_offset, count, 0); }
@@ -1931,25 +2045,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x09:      /* HT */
-          case 0x20:      /* SPACE */
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          HSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1965,7 +2061,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
           else
             { ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2025,18 +2121,24 @@ for (;;)
       to wait for them to pass before continuing. */
 
       case OP_EXTUNI:
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nclen = 1;
-          GETCHARLEN(c, nptr, nclen);
-          if (UCD_CATEGORY(c) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += nclen;
+          lgb = rgb;
+          nptr += dlen;
           }
+        if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
         }
       break;
@@ -2050,20 +2152,28 @@ for (;;)
       case OP_ANYNL:
       if (clen > 0) switch(c)
         {
-        case 0x000b:
-        case 0x000c:
-        case 0x0085:
+        case CHAR_VT:
+        case CHAR_FF:
+        case CHAR_NEL:
+#ifndef EBCDIC
         case 0x2028:
         case 0x2029:
+#endif  /* Not EBCDIC */
         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
 
-        case 0x000a:
+        case CHAR_LF:
         ADD_NEW(state_offset + 1, 0);
         break;
 
-        case 0x000d:
-        if (ptr + 1 < end_subject && ptr[1] == 0x0a)
+        case CHAR_CR:
+        if (ptr + 1 >= end_subject)
           {
+          ADD_NEW(state_offset + 1, 0);
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
+          }
+        else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
+          {
           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
           }
         else
@@ -2078,13 +2188,7 @@ for (;;)
       case OP_NOT_VSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x000a:
-        case 0x000b:
-        case 0x000c:
-        case 0x000d:
-        case 0x0085:
-        case 0x2028:
-        case 0x2029:
+        VSPACE_CASES:
         break;
 
         default:
@@ -2097,17 +2201,12 @@ for (;;)
       case OP_VSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x000a:
-        case 0x000b:
-        case 0x000c:
-        case 0x000d:
-        case 0x0085:
-        case 0x2028:
-        case 0x2029:
+        VSPACE_CASES:
         ADD_NEW(state_offset + 1, 0);
         break;
 
-        default: break;
+        default:
+        break;
         }
       break;
 
@@ -2115,25 +2214,7 @@ for (;;)
       case OP_NOT_HSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x09:      /* HT */
-        case 0x20:      /* SPACE */
-        case 0xa0:      /* NBSP */
-        case 0x1680:    /* OGHAM SPACE MARK */
-        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-        case 0x2000:    /* EN QUAD */
-        case 0x2001:    /* EM QUAD */
-        case 0x2002:    /* EN SPACE */
-        case 0x2003:    /* EM SPACE */
-        case 0x2004:    /* THREE-PER-EM SPACE */
-        case 0x2005:    /* FOUR-PER-EM SPACE */
-        case 0x2006:    /* SIX-PER-EM SPACE */
-        case 0x2007:    /* FIGURE SPACE */
-        case 0x2008:    /* PUNCTUATION SPACE */
-        case 0x2009:    /* THIN SPACE */
-        case 0x200A:    /* HAIR SPACE */
-        case 0x202f:    /* NARROW NO-BREAK SPACE */
-        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-        case 0x3000:    /* IDEOGRAPHIC SPACE */
+        HSPACE_CASES:
         break;
 
         default:
@@ -2146,47 +2227,42 @@ for (;;)
       case OP_HSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x09:      /* HT */
-        case 0x20:      /* SPACE */
-        case 0xa0:      /* NBSP */
-        case 0x1680:    /* OGHAM SPACE MARK */
-        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-        case 0x2000:    /* EN QUAD */
-        case 0x2001:    /* EM QUAD */
-        case 0x2002:    /* EN SPACE */
-        case 0x2003:    /* EM SPACE */
-        case 0x2004:    /* THREE-PER-EM SPACE */
-        case 0x2005:    /* FOUR-PER-EM SPACE */
-        case 0x2006:    /* SIX-PER-EM SPACE */
-        case 0x2007:    /* FIGURE SPACE */
-        case 0x2008:    /* PUNCTUATION SPACE */
-        case 0x2009:    /* THIN SPACE */
-        case 0x200A:    /* HAIR SPACE */
-        case 0x202f:    /* NARROW NO-BREAK SPACE */
-        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-        case 0x3000:    /* IDEOGRAPHIC SPACE */
+        HSPACE_CASES:
         ADD_NEW(state_offset + 1, 0);
         break;
+
+        default:
+        break;
         }
       break;
 
       /*-----------------------------------------------------------------*/
-      /* Match a negated single character casefully. This is only used for
-      one-byte characters, that is, we know that d < 256. The character we are
-      checking (c) can be multibyte. */
+      /* Match a negated single character casefully. */
 
       case OP_NOT:
       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
       break;
 
       /*-----------------------------------------------------------------*/
-      /* Match a negated single character caselessly. This is only used for
-      one-byte characters, that is, we know that d < 256. The character we are
-      checking (c) can be multibyte. */
+      /* Match a negated single character caselessly. */
 
       case OP_NOTI:
-      if (clen > 0 && c != d && c != fcc[d])
-        { ADD_NEW(state_offset + dlen + 1, 0); }
+      if (clen > 0)
+        {
+        unsigned int otherd;
+#ifdef SUPPORT_UTF
+        if (utf && d >= 128)
+          {
+#ifdef SUPPORT_UCP
+          otherd = UCD_OTHERCASE(d);
+#endif  /* SUPPORT_UCP */
+          }
+        else
+#endif  /* SUPPORT_UTF */
+        otherd = TABLE_GET(d, fcc, d);
+        if (c != d && c != otherd)
+          { ADD_NEW(state_offset + dlen + 1, 0); }
+        }
       break;
 
       /*-----------------------------------------------------------------*/
@@ -2210,7 +2286,7 @@ for (;;)
       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2257,7 +2333,7 @@ for (;;)
       ADD_ACTIVE(state_offset + dlen + 1, 0);
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2302,7 +2378,7 @@ for (;;)
       ADD_ACTIVE(state_offset + dlen + 1, 0);
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2339,7 +2415,7 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2355,7 +2431,7 @@ for (;;)
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
           {
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -2383,7 +2459,7 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2404,7 +2480,7 @@ for (;;)
             active_count--;             /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -2477,11 +2553,11 @@ for (;;)
           case OP_CRRANGE:
           case OP_CRMINRANGE:
           count = current_state->count;  /* Already matched */
-          if (count >= GET2(ecode, 1))
+          if (count >= (int)GET2(ecode, 1))
             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
           if (isinclass)
             {
-            int max = GET2(ecode, 1 + IMM2_SIZE);
+            int max = (int)GET2(ecode, 1 + IMM2_SIZE);
             if (++count >= max && max != 0)   /* Max 0 => no limit */
               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
             else
@@ -2557,10 +2633,12 @@ for (;;)
             cb.version          = 1;   /* Version 1 of the callout block */
             cb.callout_number   = code[LINK_SIZE+2];
             cb.offset_vector    = offsets;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
             cb.subject          = (PCRE_SPTR)start_subject;
-#else
+#elif defined COMPILE_PCRE16
             cb.subject          = (PCRE_SPTR16)start_subject;
+#elif defined COMPILE_PCRE32
+            cb.subject          = (PCRE_SPTR32)start_subject;
 #endif
             cb.subject_length   = (int)(end_subject - start_subject);
             cb.start_match      = (int)(current_subject - start_subject);
@@ -2691,10 +2769,13 @@ for (;;)
           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
             {
             int charcount = local_offsets[rc+1] - local_offsets[rc];
-#ifdef SUPPORT_UTF
-            const pcre_uchar *p = start_subject + local_offsets[rc];
-            const pcre_uchar *pp = start_subject + local_offsets[rc+1];
-            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+            if (utf)
+              {
+              const pcre_uchar *p = start_subject + local_offsets[rc];
+              const pcre_uchar *pp = start_subject + local_offsets[rc+1];
+              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              }
 #endif
             if (charcount > 0)
               {
@@ -2792,8 +2873,8 @@ for (;;)
             const pcre_uchar *p = ptr;
             const pcre_uchar *pp = local_ptr;
             charcount = (int)(pp - p);
-#ifdef SUPPORT_UTF
-            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+            if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 #endif
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
             }
@@ -2874,10 +2955,13 @@ for (;;)
             }
           else
             {
-#ifdef SUPPORT_UTF
-            const pcre_uchar *p = start_subject + local_offsets[0];
-            const pcre_uchar *pp = start_subject + local_offsets[1];
-            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+            if (utf)
+              {
+              const pcre_uchar *p = start_subject + local_offsets[0];
+              const pcre_uchar *pp = start_subject + local_offsets[1];
+              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              }
 #endif
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
             if (repeat_state_offset >= 0)
@@ -2900,10 +2984,12 @@ for (;;)
         cb.version          = 1;   /* Version 1 of the callout block */
         cb.callout_number   = code[1];
         cb.offset_vector    = offsets;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
         cb.subject          = (PCRE_SPTR)start_subject;
-#else
+#elif defined COMPILE_PCRE16
         cb.subject          = (PCRE_SPTR16)start_subject;
+#elif defined COMPILE_PCRE32
+        cb.subject          = (PCRE_SPTR32)start_subject;
 #endif
         cb.subject_length   = (int)(end_subject - start_subject);
         cb.start_match      = (int)(current_subject - start_subject);
@@ -2946,7 +3032,7 @@ for (;;)
   if (new_count <= 0)
     {
     if (rlevel == 1 &&                               /* Top level, and */
-        could_continue &&                            /* Some could go on */
+        could_continue &&                            /* Some could go on, and */
         forced_fail != workspace[1] &&               /* Not all forced fail & */
         (                                            /* either... */
         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
@@ -2954,17 +3040,14 @@ for (;;)
         ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
          match_count < 0)                            /* no matches */
         ) &&                                         /* And... */
-        ptr >= end_subject &&                  /* Reached end of subject */
-        ptr > md->start_used_ptr)              /* Inspected non-empty string */
-      {
-      if (offsetcount >= 2)
-        {
-        offsets[0] = (int)(md->start_used_ptr - start_subject);
-        offsets[1] = (int)(end_subject - start_subject);
-        }
+        (
+        partial_newline ||                           /* Either partial NL */
+          (                                          /* or ... */
+          ptr >= end_subject &&                /* End of subject and */
+          ptr > md->start_used_ptr)            /* Inspected non-empty string */
+          )
+        )
       match_count = PCRE_ERROR_PARTIAL;
-      }
-
     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
       rlevel*2-2, SP));
@@ -3014,16 +3097,21 @@ Returns:          > 0 => number of match offset pairs 
                  < -1 => some kind of unexpected problem
 */
 
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
   const char *subject, int length, int start_offset, int options, int *offsets,
   int offsetcount, int *workspace, int wscount)
-#else
+#elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
   int offsetcount, int *workspace, int wscount)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+  PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount, int *workspace, int wscount)
 #endif
 {
 REAL_PCRE *re = (REAL_PCRE *)argument_re;
@@ -3050,13 +3138,31 @@ if (re == NULL || subject == NULL || workspace == NULL
    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
+if (length < 0) return PCRE_ERROR_BADLENGTH;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 
-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
 
+if (re->magic_number != MAGIC_NUMBER)
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+  {
+  if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
+    workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+      return PCRE_ERROR_DFA_BADRESTART;
+  }
+
+/* Set up study, callout, and table data */
+
 md->tables = re->tables;
 md->callout_data = NULL;
 
@@ -3074,16 +3180,6 @@ if (extra_data != NULL)
     md->tables = extra_data->tables;
   }
 
-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
-  return re->magic_number == REVERSED_MAGIC_NUMBER?
-    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
 /* Set some local values */
 
 current_subject = (const pcre_uchar *)subject + start_offset;
@@ -3091,7 +3187,7 @@ end_subject = (const pcre_uchar *)subject + length;
 req_char_ptr = current_subject - 1;
 
 #ifdef SUPPORT_UTF
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
 utf = (re->options & PCRE_UTF8) != 0;
 #else
 utf = FALSE;
@@ -3177,12 +3273,21 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
       offsets[0] = erroroffset;
       offsets[1] = errorcode;
       }
-    return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
+#if defined COMPILE_PCRE8
+    return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#elif defined COMPILE_PCRE16
+    return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
+      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#elif defined COMPILE_PCRE32
+    return PCRE_ERROR_BADUTF32;
+#endif
     }
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
   if (start_offset > 0 && start_offset < length &&
         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
+#endif
   }
 #endif
 
@@ -3292,12 +3397,15 @@ for (;;)
       if (has_first_char)
         {
         if (first_char != first_char2)
+          {
+          pcre_uchar csc;
           while (current_subject < end_subject &&
-              *current_subject != first_char && *current_subject != first_char2)
+                 (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
             current_subject++;
+          }
         else
           while (current_subject < end_subject &&
-                 *current_subject != first_char)
+                 RAWUCHARTEST(current_subject) != first_char)
             current_subject++;
         }
 
@@ -3327,10 +3435,10 @@ for (;;)
           ANYCRLF, and we are now at a LF, advance the match position by one
           more character. */
 
-          if (current_subject[-1] == CHAR_CR &&
+          if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                current_subject < end_subject &&
-               *current_subject == CHAR_NL)
+               RAWUCHARTEST(current_subject) == CHAR_NL)
             current_subject++;
           }
         }
@@ -3341,7 +3449,7 @@ for (;;)
         {
         while (current_subject < end_subject)
           {
-          register unsigned int c = *current_subject;
+          register pcre_uint32 c = RAWUCHARTEST(current_subject);
 #ifndef COMPILE_PCRE8
           if (c > 255) c = 255;
 #endif
@@ -3407,7 +3515,7 @@ for (;;)
             {
             while (p < end_subject)
               {
-              register int pp = *p++;
+              register pcre_uint32 pp = RAWUCHARINCTEST(p);
               if (pp == req_char || pp == req_char2) { p--; break; }
               }
             }
@@ -3415,7 +3523,7 @@ for (;;)
             {
             while (p < end_subject)
               {
-              if (*p++ == req_char) { p--; break; }
+              if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
               }
             }
 
@@ -3453,7 +3561,17 @@ for (;;)
   /* Anything other than "no match" means we are done, always; otherwise, carry
   on only if not anchored. */
 
-  if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
+  if (rc != PCRE_ERROR_NOMATCH || anchored)
+    {
+    if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
+      {
+      offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
+      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
+      if (offsetcount > 2)
+        offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
+      }
+    return rc;
+    }
 
   /* Advance to the next subject character unless we are at the end of a line
   and firstline is set. */
@@ -3473,9 +3591,9 @@ for (;;)
   not contain any explicit matches for \r or \n, and the newline option is CRLF
   or ANY or ANYCRLF, advance the match position by one more character. */
 
-  if (current_subject[-1] == CHAR_CR &&
+  if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
       current_subject < end_subject &&
-      *current_subject == CHAR_NL &&
+      RAWUCHARTEST(current_subject) == CHAR_NL &&
       (re->flags & PCRE_HASCRORLF) == 0 &&
         (md->nltype == NLTYPE_ANY ||
          md->nltype == NLTYPE_ANYCRLF ||