--- embedaddon/pcre/pcre_dfa_exec.c	2012/10/09 09:19:17	1.1.1.3
+++ embedaddon/pcre/pcre_dfa_exec.c	2013/07/22 08:25:55	1.1.1.4
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the
 below for why this module is different).
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -302,13 +302,13 @@ Returns:       nothing
 static void
 pchars(const pcre_uchar *p, int length, FILE *f)
 {
-int c;
+pcre_uint32 c;
 while (length-- > 0)
   {
   if (isprint(c = *(p++)))
     fprintf(f, "%c", c);
   else
-    fprintf(f, "\\x%02x", c);
+    fprintf(f, "\\x{%02x}", c);
   }
 }
 #endif
@@ -571,7 +571,7 @@ for (;;)
   {
   int i, j;
   int clen, dlen;
-  unsigned int c, d;
+  pcre_uint32 c, d;
   int forced_fail = 0;
   BOOL partial_newline = FALSE;
   BOOL could_continue = reset_could_continue;
@@ -613,9 +613,10 @@ for (;;)
     {
     clen = 1;        /* Number of data items in the character */
 #ifdef SUPPORT_UTF
-    if (utf) { GETCHARLEN(c, ptr, clen); } else
-#endif  /* SUPPORT_UTF */
+    GETCHARLENTEST(c, ptr, clen);
+#else
     c = *ptr;
+#endif  /* SUPPORT_UTF */
     }
   else
     {
@@ -634,7 +635,8 @@ for (;;)
     BOOL caseless = FALSE;
     const pcre_uchar *code;
     int state_offset = current_state->offset;
-    int count, codevalue, rrc;
+    int codevalue, rrc;
+    int count;
 
 #ifdef PCRE_DEBUG
     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -1007,7 +1009,7 @@ for (;;)
           {
           const pcre_uchar *temp = ptr - 1;
           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
           if (utf) { BACKCHAR(temp); }
 #endif
           GETCHARTEST(d, temp);
@@ -1060,6 +1062,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[1])
           {
@@ -1108,6 +1111,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[2];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1237,7 +1255,7 @@ for (;;)
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
           {
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -1271,7 +1289,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -1294,6 +1312,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[2])
           {
@@ -1342,6 +1361,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[3];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1368,8 +1402,9 @@ for (;;)
       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
       count = current_state->count;  /* Already matched */
       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
@@ -1377,14 +1412,16 @@ for (;;)
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nd;
-          int ndlen = 1;
-          GETCHARLEN(nd, nptr, ndlen);
-          if (UCD_CATEGORY(nd) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += ndlen;
+          lgb = rgb;
+          nptr += dlen;
           }
         count++;
         ADD_NEW_DATA(-state_offset, count, ncount);
@@ -1403,20 +1440,22 @@ for (;;)
         int ncount = 0;
         switch (c)
           {
-          case 0x000b:
-          case 0x000c:
-          case 0x0085:
+          case CHAR_VT:
+          case CHAR_FF:
+          case CHAR_NEL:
+#ifndef EBCDIC
           case 0x2028:
           case 0x2029:
+#endif  /* Not EBCDIC */
           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
           goto ANYNL01;
 
-          case 0x000d:
-          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
+          case CHAR_CR:
+          if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */
 
           ANYNL01:
-          case 0x000a:
+          case CHAR_LF:
           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
             {
             active_count--;           /* Remove non-match possibility */
@@ -1443,13 +1482,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x000a:
-          case 0x000b:
-          case 0x000c:
-          case 0x000d:
-          case 0x0085:
-          case 0x2028:
-          case 0x2029:
+          VSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1482,25 +1515,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x09:      /* HT */
-          case 0x20:      /* SPACE */
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          HSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1541,6 +1556,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[2])
           {
@@ -1589,6 +1605,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[3];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1624,8 +1655,9 @@ for (;;)
       QS2:
 
       ADD_ACTIVE(state_offset + 2, 0);
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
@@ -1634,14 +1666,16 @@ for (;;)
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nd;
-          int ndlen = 1;
-          GETCHARLEN(nd, nptr, ndlen);
-          if (UCD_CATEGORY(nd) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += ndlen;
+          lgb = rgb;
+          nptr += dlen;
           }
         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
         }
@@ -1667,27 +1701,29 @@ for (;;)
         int ncount = 0;
         switch (c)
           {
-          case 0x000b:
-          case 0x000c:
-          case 0x0085:
+          case CHAR_VT:
+          case CHAR_FF:
+          case CHAR_NEL:
+#ifndef EBCDIC
           case 0x2028:
           case 0x2029:
+#endif  /* Not EBCDIC */
           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
           goto ANYNL02;
 
-          case 0x000d:
-          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
+          case CHAR_CR:
+          if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */
 
           ANYNL02:
-          case 0x000a:
+          case CHAR_LF:
           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
             {
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
+          ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
           break;
 
           default:
@@ -1715,13 +1751,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x000a:
-          case 0x000b:
-          case 0x000c:
-          case 0x000d:
-          case 0x0085:
-          case 0x2028:
-          case 0x2029:
+          VSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1737,7 +1767,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          ADD_NEW_DATA(-(state_offset + count), 0, 0);
+          ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
           }
         }
       break;
@@ -1761,25 +1791,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x09:      /* HT */
-          case 0x20:      /* SPACE */
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          HSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1796,7 +1808,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          ADD_NEW_DATA(-(state_offset + count), 0, 0);
+          ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
           }
         }
       break;
@@ -1813,6 +1825,7 @@ for (;;)
       if (clen > 0)
         {
         BOOL OK;
+        const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[1 + IMM2_SIZE + 1])
           {
@@ -1861,6 +1874,21 @@ for (;;)
                c == CHAR_UNDERSCORE;
           break;
 
+          case PT_CLIST:
+          cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
+          for (;;)
+            {
+            if (c < *cp) { OK = FALSE; break; }
+            if (c == *cp++) { OK = TRUE; break; }
+            }
+          break;
+
+          case PT_UCNC:
+          OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+               c >= 0xe000;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1875,7 +1903,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -1891,8 +1919,9 @@ for (;;)
       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
       count = current_state->count;  /* Number already matched */
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
@@ -1900,18 +1929,20 @@ for (;;)
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nd;
-          int ndlen = 1;
-          GETCHARLEN(nd, nptr, ndlen);
-          if (UCD_CATEGORY(nd) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += ndlen;
+          lgb = rgb;
+          nptr += dlen;
           }
         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
-        if (++count >= GET2(code, 1))
+        if (++count >= (int)GET2(code, 1))
           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
         else
           { ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -1932,26 +1963,28 @@ for (;;)
         int ncount = 0;
         switch (c)
           {
-          case 0x000b:
-          case 0x000c:
-          case 0x0085:
+          case CHAR_VT:
+          case CHAR_FF:
+          case CHAR_NEL:
+#ifndef EBCDIC
           case 0x2028:
           case 0x2029:
+#endif  /* Not EBCDIC */
           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
           goto ANYNL03;
 
-          case 0x000d:
-          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
+          case CHAR_CR:
+          if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */
 
           ANYNL03:
-          case 0x000a:
+          case CHAR_LF:
           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
             {
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
           else
             { ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -1976,13 +2009,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x000a:
-          case 0x000b:
-          case 0x000c:
-          case 0x000d:
-          case 0x0085:
-          case 0x2028:
-          case 0x2029:
+          VSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -1997,7 +2024,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
           else
             { ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2018,25 +2045,7 @@ for (;;)
         BOOL OK;
         switch (c)
           {
-          case 0x09:      /* HT */
-          case 0x20:      /* SPACE */
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
+          HSPACE_CASES:
           OK = TRUE;
           break;
 
@@ -2052,7 +2061,7 @@ for (;;)
             active_count--;           /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
           else
             { ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2112,17 +2121,21 @@ for (;;)
       to wait for them to pass before continuing. */
 
       case OP_EXTUNI:
-      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
+      if (clen > 0)
         {
+        int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
         int ncount = 0;
+        lgb = UCD_GRAPHBREAK(c);
         while (nptr < end_subject)
           {
-          int nclen = 1;
-          GETCHARLEN(c, nptr, nclen);
-          if (UCD_CATEGORY(c) != ucp_M) break;
+          dlen = 1;
+          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
+          rgb = UCD_GRAPHBREAK(d);
+          if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
           ncount++;
-          nptr += nclen;
+          lgb = rgb;
+          nptr += dlen;
           }
         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
@@ -2139,25 +2152,27 @@ for (;;)
       case OP_ANYNL:
       if (clen > 0) switch(c)
         {
-        case 0x000b:
-        case 0x000c:
-        case 0x0085:
+        case CHAR_VT:
+        case CHAR_FF:
+        case CHAR_NEL:
+#ifndef EBCDIC
         case 0x2028:
         case 0x2029:
+#endif  /* Not EBCDIC */
         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
 
-        case 0x000a:
+        case CHAR_LF:
         ADD_NEW(state_offset + 1, 0);
         break;
 
-        case 0x000d:
+        case CHAR_CR:
         if (ptr + 1 >= end_subject)
           {
           ADD_NEW(state_offset + 1, 0);
           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
           }
-        else if (ptr[1] == 0x0a)
+        else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
           {
           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
           }
@@ -2173,13 +2188,7 @@ for (;;)
       case OP_NOT_VSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x000a:
-        case 0x000b:
-        case 0x000c:
-        case 0x000d:
-        case 0x0085:
-        case 0x2028:
-        case 0x2029:
+        VSPACE_CASES:
         break;
 
         default:
@@ -2192,17 +2201,12 @@ for (;;)
       case OP_VSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x000a:
-        case 0x000b:
-        case 0x000c:
-        case 0x000d:
-        case 0x0085:
-        case 0x2028:
-        case 0x2029:
+        VSPACE_CASES:
         ADD_NEW(state_offset + 1, 0);
         break;
 
-        default: break;
+        default:
+        break;
         }
       break;
 
@@ -2210,25 +2214,7 @@ for (;;)
       case OP_NOT_HSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x09:      /* HT */
-        case 0x20:      /* SPACE */
-        case 0xa0:      /* NBSP */
-        case 0x1680:    /* OGHAM SPACE MARK */
-        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-        case 0x2000:    /* EN QUAD */
-        case 0x2001:    /* EM QUAD */
-        case 0x2002:    /* EN SPACE */
-        case 0x2003:    /* EM SPACE */
-        case 0x2004:    /* THREE-PER-EM SPACE */
-        case 0x2005:    /* FOUR-PER-EM SPACE */
-        case 0x2006:    /* SIX-PER-EM SPACE */
-        case 0x2007:    /* FIGURE SPACE */
-        case 0x2008:    /* PUNCTUATION SPACE */
-        case 0x2009:    /* THIN SPACE */
-        case 0x200A:    /* HAIR SPACE */
-        case 0x202f:    /* NARROW NO-BREAK SPACE */
-        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-        case 0x3000:    /* IDEOGRAPHIC SPACE */
+        HSPACE_CASES:
         break;
 
         default:
@@ -2241,27 +2227,12 @@ for (;;)
       case OP_HSPACE:
       if (clen > 0) switch(c)
         {
-        case 0x09:      /* HT */
-        case 0x20:      /* SPACE */
-        case 0xa0:      /* NBSP */
-        case 0x1680:    /* OGHAM SPACE MARK */
-        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-        case 0x2000:    /* EN QUAD */
-        case 0x2001:    /* EM QUAD */
-        case 0x2002:    /* EN SPACE */
-        case 0x2003:    /* EM SPACE */
-        case 0x2004:    /* THREE-PER-EM SPACE */
-        case 0x2005:    /* FOUR-PER-EM SPACE */
-        case 0x2006:    /* SIX-PER-EM SPACE */
-        case 0x2007:    /* FIGURE SPACE */
-        case 0x2008:    /* PUNCTUATION SPACE */
-        case 0x2009:    /* THIN SPACE */
-        case 0x200A:    /* HAIR SPACE */
-        case 0x202f:    /* NARROW NO-BREAK SPACE */
-        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-        case 0x3000:    /* IDEOGRAPHIC SPACE */
+        HSPACE_CASES:
         ADD_NEW(state_offset + 1, 0);
         break;
+
+        default:
+        break;
         }
       break;
 
@@ -2315,7 +2286,7 @@ for (;;)
       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2362,7 +2333,7 @@ for (;;)
       ADD_ACTIVE(state_offset + dlen + 1, 0);
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2407,7 +2378,7 @@ for (;;)
       ADD_ACTIVE(state_offset + dlen + 1, 0);
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2444,7 +2415,7 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2460,7 +2431,7 @@ for (;;)
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
           {
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -2488,7 +2459,7 @@ for (;;)
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        unsigned int otherd = NOTACHAR;
+        pcre_uint32 otherd = NOTACHAR;
         if (caseless)
           {
 #ifdef SUPPORT_UTF
@@ -2509,7 +2480,7 @@ for (;;)
             active_count--;             /* Remove non-match possibility */
             next_active_state--;
             }
-          if (++count >= GET2(code, 1))
+          if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
           else
             { ADD_NEW(state_offset, count); }
@@ -2582,11 +2553,11 @@ for (;;)
           case OP_CRRANGE:
           case OP_CRMINRANGE:
           count = current_state->count;  /* Already matched */
-          if (count >= GET2(ecode, 1))
+          if (count >= (int)GET2(ecode, 1))
             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
           if (isinclass)
             {
-            int max = GET2(ecode, 1 + IMM2_SIZE);
+            int max = (int)GET2(ecode, 1 + IMM2_SIZE);
             if (++count >= max && max != 0)   /* Max 0 => no limit */
               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
             else
@@ -2662,10 +2633,12 @@ for (;;)
             cb.version          = 1;   /* Version 1 of the callout block */
             cb.callout_number   = code[LINK_SIZE+2];
             cb.offset_vector    = offsets;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
             cb.subject          = (PCRE_SPTR)start_subject;
-#else
+#elif defined COMPILE_PCRE16
             cb.subject          = (PCRE_SPTR16)start_subject;
+#elif defined COMPILE_PCRE32
+            cb.subject          = (PCRE_SPTR32)start_subject;
 #endif
             cb.subject_length   = (int)(end_subject - start_subject);
             cb.start_match      = (int)(current_subject - start_subject);
@@ -2796,7 +2769,7 @@ for (;;)
           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
             {
             int charcount = local_offsets[rc+1] - local_offsets[rc];
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
             if (utf)
               {
               const pcre_uchar *p = start_subject + local_offsets[rc];
@@ -2900,7 +2873,7 @@ for (;;)
             const pcre_uchar *p = ptr;
             const pcre_uchar *pp = local_ptr;
             charcount = (int)(pp - p);
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
             if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 #endif
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
@@ -2982,7 +2955,7 @@ for (;;)
             }
           else
             {
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
             if (utf)
               {
               const pcre_uchar *p = start_subject + local_offsets[0];
@@ -3011,10 +2984,12 @@ for (;;)
         cb.version          = 1;   /* Version 1 of the callout block */
         cb.callout_number   = code[1];
         cb.offset_vector    = offsets;
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
         cb.subject          = (PCRE_SPTR)start_subject;
-#else
+#elif defined COMPILE_PCRE16
         cb.subject          = (PCRE_SPTR16)start_subject;
+#elif defined COMPILE_PCRE32
+        cb.subject          = (PCRE_SPTR32)start_subject;
 #endif
         cb.subject_length   = (int)(end_subject - start_subject);
         cb.start_match      = (int)(current_subject - start_subject);
@@ -3072,15 +3047,7 @@ for (;;)
           ptr > md->start_used_ptr)            /* Inspected non-empty string */
           )
         )
-      {
-      if (offsetcount >= 2)
-        {
-        offsets[0] = (int)(md->start_used_ptr - start_subject);
-        offsets[1] = (int)(end_subject - start_subject);
-        }
       match_count = PCRE_ERROR_PARTIAL;
-      }
-
     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
       rlevel*2-2, SP));
@@ -3130,16 +3097,21 @@ Returns:          > 0 => number of match offset pairs 
                  < -1 => some kind of unexpected problem
 */
 
-#ifdef COMPILE_PCRE8
+#if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
   const char *subject, int length, int start_offset, int options, int *offsets,
   int offsetcount, int *workspace, int wscount)
-#else
+#elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
   int offsetcount, int *workspace, int wscount)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+  PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount, int *workspace, int wscount)
 #endif
 {
 REAL_PCRE *re = (REAL_PCRE *)argument_re;
@@ -3166,6 +3138,7 @@ if (re == NULL || subject == NULL || workspace == NULL
    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
+if (length < 0) return PCRE_ERROR_BADLENGTH;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 
 /* Check that the first field in the block is the magic number. If it is not,
@@ -3214,7 +3187,7 @@ end_subject = (const pcre_uchar *)subject + length;
 req_char_ptr = current_subject - 1;
 
 #ifdef SUPPORT_UTF
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
 utf = (re->options & PCRE_UTF8) != 0;
 #else
 utf = FALSE;
@@ -3300,12 +3273,21 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
       offsets[0] = erroroffset;
       offsets[1] = errorcode;
       }
-    return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
+#if defined COMPILE_PCRE8
+    return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#elif defined COMPILE_PCRE16
+    return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
+      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#elif defined COMPILE_PCRE32
+    return PCRE_ERROR_BADUTF32;
+#endif
     }
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
   if (start_offset > 0 && start_offset < length &&
         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
+#endif
   }
 #endif
 
@@ -3415,12 +3397,15 @@ for (;;)
       if (has_first_char)
         {
         if (first_char != first_char2)
+          {
+          pcre_uchar csc;
           while (current_subject < end_subject &&
-              *current_subject != first_char && *current_subject != first_char2)
+                 (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
             current_subject++;
+          }
         else
           while (current_subject < end_subject &&
-                 *current_subject != first_char)
+                 RAWUCHARTEST(current_subject) != first_char)
             current_subject++;
         }
 
@@ -3450,10 +3435,10 @@ for (;;)
           ANYCRLF, and we are now at a LF, advance the match position by one
           more character. */
 
-          if (current_subject[-1] == CHAR_CR &&
+          if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                current_subject < end_subject &&
-               *current_subject == CHAR_NL)
+               RAWUCHARTEST(current_subject) == CHAR_NL)
             current_subject++;
           }
         }
@@ -3464,7 +3449,7 @@ for (;;)
         {
         while (current_subject < end_subject)
           {
-          register unsigned int c = *current_subject;
+          register pcre_uint32 c = RAWUCHARTEST(current_subject);
 #ifndef COMPILE_PCRE8
           if (c > 255) c = 255;
 #endif
@@ -3530,7 +3515,7 @@ for (;;)
             {
             while (p < end_subject)
               {
-              register int pp = *p++;
+              register pcre_uint32 pp = RAWUCHARINCTEST(p);
               if (pp == req_char || pp == req_char2) { p--; break; }
               }
             }
@@ -3538,7 +3523,7 @@ for (;;)
             {
             while (p < end_subject)
               {
-              if (*p++ == req_char) { p--; break; }
+              if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
               }
             }
 
@@ -3576,7 +3561,17 @@ for (;;)
   /* Anything other than "no match" means we are done, always; otherwise, carry
   on only if not anchored. */
 
-  if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
+  if (rc != PCRE_ERROR_NOMATCH || anchored)
+    {
+    if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
+      {
+      offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
+      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
+      if (offsetcount > 2)
+        offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
+      }
+    return rc;
+    }
 
   /* Advance to the next subject character unless we are at the end of a line
   and firstline is set. */
@@ -3596,9 +3591,9 @@ for (;;)
   not contain any explicit matches for \r or \n, and the newline option is CRLF
   or ANY or ANYCRLF, advance the match position by one more character. */
 
-  if (current_subject[-1] == CHAR_CR &&
+  if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
       current_subject < end_subject &&
-      *current_subject == CHAR_NL &&
+      RAWUCHARTEST(current_subject) == CHAR_NL &&
       (re->flags & PCRE_HASCRORLF) == 0 &&
         (md->nltype == NLTYPE_ANY ||
          md->nltype == NLTYPE_ANYCRLF ||