Annotation of embedaddon/pcre/pcre_dfa_exec.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language (but see
                      7: below for why this module is different).
                      8: 
                      9:                        Written by Philip Hazel
                     10:            Copyright (c) 1997-2011 University of Cambridge
                     11: 
                     12: -----------------------------------------------------------------------------
                     13: Redistribution and use in source and binary forms, with or without
                     14: modification, are permitted provided that the following conditions are met:
                     15: 
                     16:     * Redistributions of source code must retain the above copyright notice,
                     17:       this list of conditions and the following disclaimer.
                     18: 
                     19:     * Redistributions in binary form must reproduce the above copyright
                     20:       notice, this list of conditions and the following disclaimer in the
                     21:       documentation and/or other materials provided with the distribution.
                     22: 
                     23:     * Neither the name of the University of Cambridge nor the names of its
                     24:       contributors may be used to endorse or promote products derived from
                     25:       this software without specific prior written permission.
                     26: 
                     27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37: POSSIBILITY OF SUCH DAMAGE.
                     38: -----------------------------------------------------------------------------
                     39: */
                     40: 
                     41: 
                     42: /* This module contains the external function pcre_dfa_exec(), which is an
                     43: alternative matching function that uses a sort of DFA algorithm (not a true
                     44: FSM). This is NOT Perl- compatible, but it has advantages in certain
                     45: applications. */
                     46: 
                     47: 
                     48: /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
                     49: the performance of his patterns greatly. I could not use it as it stood, as it
                     50: was not thread safe, and made assumptions about pattern sizes. Also, it caused
                     51: test 7 to loop, and test 9 to crash with a segfault.
                     52: 
                     53: The issue is the check for duplicate states, which is done by a simple linear
                     54: search up the state list. (Grep for "duplicate" below to find the code.) For
                     55: many patterns, there will never be many states active at one time, so a simple
                     56: linear search is fine. In patterns that have many active states, it might be a
                     57: bottleneck. The suggested code used an indexing scheme to remember which states
                     58: had previously been used for each character, and avoided the linear search when
                     59: it knew there was no chance of a duplicate. This was implemented when adding
                     60: states to the state lists.
                     61: 
                     62: I wrote some thread-safe, not-limited code to try something similar at the time
                     63: of checking for duplicates (instead of when adding states), using index vectors
                     64: on the stack. It did give a 13% improvement with one specially constructed
                     65: pattern for certain subject strings, but on other strings and on many of the
                     66: simpler patterns in the test suite it did worse. The major problem, I think,
                     67: was the extra time to initialize the index. This had to be done for each call
                     68: of internal_dfa_exec(). (The supplied patch used a static vector, initialized
                     69: only once - I suspect this was the cause of the problems with the tests.)
                     70: 
                     71: Overall, I concluded that the gains in some cases did not outweigh the losses
                     72: in others, so I abandoned this code. */
                     73: 
                     74: 
                     75: 
                     76: #ifdef HAVE_CONFIG_H
                     77: #include "config.h"
                     78: #endif
                     79: 
                     80: #define NLBLOCK md             /* Block containing newline information */
                     81: #define PSSTART start_subject  /* Field containing processed string start */
                     82: #define PSEND   end_subject    /* Field containing processed string end */
                     83: 
                     84: #include "pcre_internal.h"
                     85: 
                     86: 
                     87: /* For use to indent debugging output */
                     88: 
                     89: #define SP "                   "
                     90: 
                     91: 
                     92: /*************************************************
                     93: *      Code parameters and static tables         *
                     94: *************************************************/
                     95: 
                     96: /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
                     97: into others, under special conditions. A gap of 20 between the blocks should be
                     98: enough. The resulting opcodes don't have to be less than 256 because they are
                     99: never stored, so we push them well clear of the normal opcodes. */
                    100: 
                    101: #define OP_PROP_EXTRA       300
                    102: #define OP_EXTUNI_EXTRA     320
                    103: #define OP_ANYNL_EXTRA      340
                    104: #define OP_HSPACE_EXTRA     360
                    105: #define OP_VSPACE_EXTRA     380
                    106: 
                    107: 
                    108: /* This table identifies those opcodes that are followed immediately by a
                    109: character that is to be tested in some way. This makes it possible to
                    110: centralize the loading of these characters. In the case of Type * etc, the
                    111: "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
                    112: small value. Non-zero values in the table are the offsets from the opcode where
                    113: the character is to be found. ***NOTE*** If the start of this table is
                    114: modified, the three tables that follow must also be modified. */
                    115: 
                    116: static const uschar coptable[] = {
                    117:   0,                             /* End                                    */
                    118:   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
                    119:   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
                    120:   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
                    121:   0, 0,                          /* \P, \p                                 */
                    122:   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
                    123:   0,                             /* \X                                     */
                    124:   0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
                    125:   1,                             /* Char                                   */
                    126:   1,                             /* Chari                                  */
                    127:   1,                             /* not                                    */
                    128:   1,                             /* noti                                   */
                    129:   /* Positive single-char repeats                                          */
                    130:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    131:   3, 3, 3,                       /* upto, minupto, exact                   */
                    132:   1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
                    133:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
                    134:   3, 3, 3,                       /* upto I, minupto I, exact I             */
                    135:   1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */
                    136:   /* Negative single-char repeats - only for chars < 256                   */
                    137:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
                    138:   3, 3, 3,                       /* NOT upto, minupto, exact               */
                    139:   1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */
                    140:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
                    141:   3, 3, 3,                       /* NOT upto I, minupto I, exact I         */
                    142:   1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */
                    143:   /* Positive type repeats                                                 */
                    144:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
                    145:   3, 3, 3,                       /* Type upto, minupto, exact              */
                    146:   1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
                    147:   /* Character class & ref repeats                                         */
                    148:   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
                    149:   0, 0,                          /* CRRANGE, CRMINRANGE                    */
                    150:   0,                             /* CLASS                                  */
                    151:   0,                             /* NCLASS                                 */
                    152:   0,                             /* XCLASS - variable length               */
                    153:   0,                             /* REF                                    */
                    154:   0,                             /* REFI                                   */
                    155:   0,                             /* RECURSE                                */
                    156:   0,                             /* CALLOUT                                */
                    157:   0,                             /* Alt                                    */
                    158:   0,                             /* Ket                                    */
                    159:   0,                             /* KetRmax                                */
                    160:   0,                             /* KetRmin                                */
                    161:   0,                             /* KetRpos                                */
                    162:   0,                             /* Reverse                                */
                    163:   0,                             /* Assert                                 */
                    164:   0,                             /* Assert not                             */
                    165:   0,                             /* Assert behind                          */
                    166:   0,                             /* Assert behind not                      */
                    167:   0, 0,                          /* ONCE, ONCE_NC                          */
                    168:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    169:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
                    170:   0, 0,                          /* CREF, NCREF                            */
                    171:   0, 0,                          /* RREF, NRREF                            */
                    172:   0,                             /* DEF                                    */
                    173:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    174:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    175:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    176:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    177:   0, 0                           /* CLOSE, SKIPZERO  */
                    178: };
                    179: 
                    180: /* This table identifies those opcodes that inspect a character. It is used to
                    181: remember the fact that a character could have been inspected when the end of
                    182: the subject is reached. ***NOTE*** If the start of this table is modified, the
                    183: two tables that follow must also be modified. */
                    184: 
                    185: static const uschar poptable[] = {
                    186:   0,                             /* End                                    */
                    187:   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
                    188:   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
                    189:   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
                    190:   1, 1,                          /* \P, \p                                 */
                    191:   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
                    192:   1,                             /* \X                                     */
                    193:   0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
                    194:   1,                             /* Char                                   */
                    195:   1,                             /* Chari                                  */
                    196:   1,                             /* not                                    */
                    197:   1,                             /* noti                                   */
                    198:   /* Positive single-char repeats                                          */
                    199:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    200:   1, 1, 1,                       /* upto, minupto, exact                   */
                    201:   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
                    202:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
                    203:   1, 1, 1,                       /* upto I, minupto I, exact I             */
                    204:   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
                    205:   /* Negative single-char repeats - only for chars < 256                   */
                    206:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
                    207:   1, 1, 1,                       /* NOT upto, minupto, exact               */
                    208:   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
                    209:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
                    210:   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
                    211:   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
                    212:   /* Positive type repeats                                                 */
                    213:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
                    214:   1, 1, 1,                       /* Type upto, minupto, exact              */
                    215:   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
                    216:   /* Character class & ref repeats                                         */
                    217:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    218:   1, 1,                          /* CRRANGE, CRMINRANGE                    */
                    219:   1,                             /* CLASS                                  */
                    220:   1,                             /* NCLASS                                 */
                    221:   1,                             /* XCLASS - variable length               */
                    222:   0,                             /* REF                                    */
                    223:   0,                             /* REFI                                   */
                    224:   0,                             /* RECURSE                                */
                    225:   0,                             /* CALLOUT                                */
                    226:   0,                             /* Alt                                    */
                    227:   0,                             /* Ket                                    */
                    228:   0,                             /* KetRmax                                */
                    229:   0,                             /* KetRmin                                */
                    230:   0,                             /* KetRpos                                */
                    231:   0,                             /* Reverse                                */
                    232:   0,                             /* Assert                                 */
                    233:   0,                             /* Assert not                             */
                    234:   0,                             /* Assert behind                          */
                    235:   0,                             /* Assert behind not                      */
                    236:   0, 0,                          /* ONCE, ONCE_NC                          */
                    237:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    238:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
                    239:   0, 0,                          /* CREF, NCREF                            */
                    240:   0, 0,                          /* RREF, NRREF                            */
                    241:   0,                             /* DEF                                    */
                    242:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    243:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    244:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    245:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    246:   0, 0                           /* CLOSE, SKIPZERO                        */
                    247: };
                    248: 
                    249: /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
                    250: and \w */
                    251: 
                    252: static const uschar toptable1[] = {
                    253:   0, 0, 0, 0, 0, 0,
                    254:   ctype_digit, ctype_digit,
                    255:   ctype_space, ctype_space,
                    256:   ctype_word,  ctype_word,
                    257:   0, 0                            /* OP_ANY, OP_ALLANY */
                    258: };
                    259: 
                    260: static const uschar toptable2[] = {
                    261:   0, 0, 0, 0, 0, 0,
                    262:   ctype_digit, 0,
                    263:   ctype_space, 0,
                    264:   ctype_word,  0,
                    265:   1, 1                            /* OP_ANY, OP_ALLANY */
                    266: };
                    267: 
                    268: 
                    269: /* Structure for holding data about a particular state, which is in effect the
                    270: current data for an active path through the match tree. It must consist
                    271: entirely of ints because the working vector we are passed, and which we put
                    272: these structures in, is a vector of ints. */
                    273: 
                    274: typedef struct stateblock {
                    275:   int offset;                     /* Offset to opcode */
                    276:   int count;                      /* Count for repeats */
                    277:   int data;                       /* Some use extra data */
                    278: } stateblock;
                    279: 
                    280: #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
                    281: 
                    282: 
                    283: #ifdef PCRE_DEBUG
                    284: /*************************************************
                    285: *             Print character string             *
                    286: *************************************************/
                    287: 
                    288: /* Character string printing function for debugging.
                    289: 
                    290: Arguments:
                    291:   p            points to string
                    292:   length       number of bytes
                    293:   f            where to print
                    294: 
                    295: Returns:       nothing
                    296: */
                    297: 
                    298: static void
                    299: pchars(unsigned char *p, int length, FILE *f)
                    300: {
                    301: int c;
                    302: while (length-- > 0)
                    303:   {
                    304:   if (isprint(c = *(p++)))
                    305:     fprintf(f, "%c", c);
                    306:   else
                    307:     fprintf(f, "\\x%02x", c);
                    308:   }
                    309: }
                    310: #endif
                    311: 
                    312: 
                    313: 
                    314: /*************************************************
                    315: *    Execute a Regular Expression - DFA engine   *
                    316: *************************************************/
                    317: 
                    318: /* This internal function applies a compiled pattern to a subject string,
                    319: starting at a given point, using a DFA engine. This function is called from the
                    320: external one, possibly multiple times if the pattern is not anchored. The
                    321: function calls itself recursively for some kinds of subpattern.
                    322: 
                    323: Arguments:
                    324:   md                the match_data block with fixed information
                    325:   this_start_code   the opening bracket of this subexpression's code
                    326:   current_subject   where we currently are in the subject string
                    327:   start_offset      start offset in the subject string
                    328:   offsets           vector to contain the matching string offsets
                    329:   offsetcount       size of same
                    330:   workspace         vector of workspace
                    331:   wscount           size of same
                    332:   rlevel            function call recursion level
                    333: 
                    334: Returns:            > 0 => number of match offset pairs placed in offsets
                    335:                     = 0 => offsets overflowed; longest matches are present
                    336:                      -1 => failed to match
                    337:                    < -1 => some kind of unexpected problem
                    338: 
                    339: The following macros are used for adding states to the two state vectors (one
                    340: for the current character, one for the following character). */
                    341: 
                    342: #define ADD_ACTIVE(x,y) \
                    343:   if (active_count++ < wscount) \
                    344:     { \
                    345:     next_active_state->offset = (x); \
                    346:     next_active_state->count  = (y); \
                    347:     next_active_state++; \
                    348:     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    349:     } \
                    350:   else return PCRE_ERROR_DFA_WSSIZE
                    351: 
                    352: #define ADD_ACTIVE_DATA(x,y,z) \
                    353:   if (active_count++ < wscount) \
                    354:     { \
                    355:     next_active_state->offset = (x); \
                    356:     next_active_state->count  = (y); \
                    357:     next_active_state->data   = (z); \
                    358:     next_active_state++; \
                    359:     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
                    360:     } \
                    361:   else return PCRE_ERROR_DFA_WSSIZE
                    362: 
                    363: #define ADD_NEW(x,y) \
                    364:   if (new_count++ < wscount) \
                    365:     { \
                    366:     next_new_state->offset = (x); \
                    367:     next_new_state->count  = (y); \
                    368:     next_new_state++; \
                    369:     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    370:     } \
                    371:   else return PCRE_ERROR_DFA_WSSIZE
                    372: 
                    373: #define ADD_NEW_DATA(x,y,z) \
                    374:   if (new_count++ < wscount) \
                    375:     { \
                    376:     next_new_state->offset = (x); \
                    377:     next_new_state->count  = (y); \
                    378:     next_new_state->data   = (z); \
                    379:     next_new_state++; \
                    380:     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
                    381:     } \
                    382:   else return PCRE_ERROR_DFA_WSSIZE
                    383: 
                    384: /* And now, here is the code */
                    385: 
                    386: static int
                    387: internal_dfa_exec(
                    388:   dfa_match_data *md,
                    389:   const uschar *this_start_code,
                    390:   const uschar *current_subject,
                    391:   int start_offset,
                    392:   int *offsets,
                    393:   int offsetcount,
                    394:   int *workspace,
                    395:   int wscount,
                    396:   int  rlevel)
                    397: {
                    398: stateblock *active_states, *new_states, *temp_states;
                    399: stateblock *next_active_state, *next_new_state;
                    400: 
                    401: const uschar *ctypes, *lcc, *fcc;
                    402: const uschar *ptr;
                    403: const uschar *end_code, *first_op;
                    404: 
                    405: dfa_recursion_info new_recursive;
                    406: 
                    407: int active_count, new_count, match_count;
                    408: 
                    409: /* Some fields in the md block are frequently referenced, so we load them into
                    410: independent variables in the hope that this will perform better. */
                    411: 
                    412: const uschar *start_subject = md->start_subject;
                    413: const uschar *end_subject = md->end_subject;
                    414: const uschar *start_code = md->start_code;
                    415: 
                    416: #ifdef SUPPORT_UTF8
                    417: BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
                    418: #else
                    419: BOOL utf8 = FALSE;
                    420: #endif
                    421: 
                    422: rlevel++;
                    423: offsetcount &= (-2);
                    424: 
                    425: wscount -= 2;
                    426: wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
                    427:           (2 * INTS_PER_STATEBLOCK);
                    428: 
                    429: DPRINTF(("\n%.*s---------------------\n"
                    430:   "%.*sCall to internal_dfa_exec f=%d\n",
                    431:   rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
                    432: 
                    433: ctypes = md->tables + ctypes_offset;
                    434: lcc = md->tables + lcc_offset;
                    435: fcc = md->tables + fcc_offset;
                    436: 
                    437: match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
                    438: 
                    439: active_states = (stateblock *)(workspace + 2);
                    440: next_new_state = new_states = active_states + wscount;
                    441: new_count = 0;
                    442: 
                    443: first_op = this_start_code + 1 + LINK_SIZE +
                    444:   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
                    445:     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0);
                    446: 
                    447: /* The first thing in any (sub) pattern is a bracket of some sort. Push all
                    448: the alternative states onto the list, and find out where the end is. This
                    449: makes is possible to use this function recursively, when we want to stop at a
                    450: matching internal ket rather than at the end.
                    451: 
                    452: If the first opcode in the first alternative is OP_REVERSE, we are dealing with
                    453: a backward assertion. In that case, we have to find out the maximum amount to
                    454: move back, and set up each alternative appropriately. */
                    455: 
                    456: if (*first_op == OP_REVERSE)
                    457:   {
                    458:   int max_back = 0;
                    459:   int gone_back;
                    460: 
                    461:   end_code = this_start_code;
                    462:   do
                    463:     {
                    464:     int back = GET(end_code, 2+LINK_SIZE);
                    465:     if (back > max_back) max_back = back;
                    466:     end_code += GET(end_code, 1);
                    467:     }
                    468:   while (*end_code == OP_ALT);
                    469: 
                    470:   /* If we can't go back the amount required for the longest lookbehind
                    471:   pattern, go back as far as we can; some alternatives may still be viable. */
                    472: 
                    473: #ifdef SUPPORT_UTF8
                    474:   /* In character mode we have to step back character by character */
                    475: 
                    476:   if (utf8)
                    477:     {
                    478:     for (gone_back = 0; gone_back < max_back; gone_back++)
                    479:       {
                    480:       if (current_subject <= start_subject) break;
                    481:       current_subject--;
                    482:       while (current_subject > start_subject &&
                    483:              (*current_subject & 0xc0) == 0x80)
                    484:         current_subject--;
                    485:       }
                    486:     }
                    487:   else
                    488: #endif
                    489: 
                    490:   /* In byte-mode we can do this quickly. */
                    491: 
                    492:     {
                    493:     gone_back = (current_subject - max_back < start_subject)?
                    494:       (int)(current_subject - start_subject) : max_back;
                    495:     current_subject -= gone_back;
                    496:     }
                    497: 
                    498:   /* Save the earliest consulted character */
                    499: 
                    500:   if (current_subject < md->start_used_ptr)
                    501:     md->start_used_ptr = current_subject;
                    502: 
                    503:   /* Now we can process the individual branches. */
                    504: 
                    505:   end_code = this_start_code;
                    506:   do
                    507:     {
                    508:     int back = GET(end_code, 2+LINK_SIZE);
                    509:     if (back <= gone_back)
                    510:       {
                    511:       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
                    512:       ADD_NEW_DATA(-bstate, 0, gone_back - back);
                    513:       }
                    514:     end_code += GET(end_code, 1);
                    515:     }
                    516:   while (*end_code == OP_ALT);
                    517:  }
                    518: 
                    519: /* This is the code for a "normal" subpattern (not a backward assertion). The
                    520: start of a whole pattern is always one of these. If we are at the top level,
                    521: we may be asked to restart matching from the same point that we reached for a
                    522: previous partial match. We still have to scan through the top-level branches to
                    523: find the end state. */
                    524: 
                    525: else
                    526:   {
                    527:   end_code = this_start_code;
                    528: 
                    529:   /* Restarting */
                    530: 
                    531:   if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
                    532:     {
                    533:     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
                    534:     new_count = workspace[1];
                    535:     if (!workspace[0])
                    536:       memcpy(new_states, active_states, new_count * sizeof(stateblock));
                    537:     }
                    538: 
                    539:   /* Not restarting */
                    540: 
                    541:   else
                    542:     {
                    543:     int length = 1 + LINK_SIZE +
                    544:       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
                    545:         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?
                    546:         2:0);
                    547:     do
                    548:       {
                    549:       ADD_NEW((int)(end_code - start_code + length), 0);
                    550:       end_code += GET(end_code, 1);
                    551:       length = 1 + LINK_SIZE;
                    552:       }
                    553:     while (*end_code == OP_ALT);
                    554:     }
                    555:   }
                    556: 
                    557: workspace[0] = 0;    /* Bit indicating which vector is current */
                    558: 
                    559: DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
                    560: 
                    561: /* Loop for scanning the subject */
                    562: 
                    563: ptr = current_subject;
                    564: for (;;)
                    565:   {
                    566:   int i, j;
                    567:   int clen, dlen;
                    568:   unsigned int c, d;
                    569:   int forced_fail = 0;
                    570:   BOOL could_continue = FALSE;
                    571: 
                    572:   /* Make the new state list into the active state list and empty the
                    573:   new state list. */
                    574: 
                    575:   temp_states = active_states;
                    576:   active_states = new_states;
                    577:   new_states = temp_states;
                    578:   active_count = new_count;
                    579:   new_count = 0;
                    580: 
                    581:   workspace[0] ^= 1;              /* Remember for the restarting feature */
                    582:   workspace[1] = active_count;
                    583: 
                    584: #ifdef PCRE_DEBUG
                    585:   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
                    586:   pchars((uschar *)ptr, strlen((char *)ptr), stdout);
                    587:   printf("\"\n");
                    588: 
                    589:   printf("%.*sActive states: ", rlevel*2-2, SP);
                    590:   for (i = 0; i < active_count; i++)
                    591:     printf("%d/%d ", active_states[i].offset, active_states[i].count);
                    592:   printf("\n");
                    593: #endif
                    594: 
                    595:   /* Set the pointers for adding new states */
                    596: 
                    597:   next_active_state = active_states + active_count;
                    598:   next_new_state = new_states;
                    599: 
                    600:   /* Load the current character from the subject outside the loop, as many
                    601:   different states may want to look at it, and we assume that at least one
                    602:   will. */
                    603: 
                    604:   if (ptr < end_subject)
                    605:     {
                    606:     clen = 1;        /* Number of bytes in the character */
                    607: #ifdef SUPPORT_UTF8
                    608:     if (utf8) { GETCHARLEN(c, ptr, clen); } else
                    609: #endif  /* SUPPORT_UTF8 */
                    610:     c = *ptr;
                    611:     }
                    612:   else
                    613:     {
                    614:     clen = 0;        /* This indicates the end of the subject */
                    615:     c = NOTACHAR;    /* This value should never actually be used */
                    616:     }
                    617: 
                    618:   /* Scan up the active states and act on each one. The result of an action
                    619:   may be to add more states to the currently active list (e.g. on hitting a
                    620:   parenthesis) or it may be to put states on the new list, for considering
                    621:   when we move the character pointer on. */
                    622: 
                    623:   for (i = 0; i < active_count; i++)
                    624:     {
                    625:     stateblock *current_state = active_states + i;
                    626:     BOOL caseless = FALSE;
                    627:     const uschar *code;
                    628:     int state_offset = current_state->offset;
                    629:     int count, codevalue, rrc;
                    630: 
                    631: #ifdef PCRE_DEBUG
                    632:     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
                    633:     if (clen == 0) printf("EOL\n");
                    634:       else if (c > 32 && c < 127) printf("'%c'\n", c);
                    635:         else printf("0x%02x\n", c);
                    636: #endif
                    637: 
                    638:     /* A negative offset is a special case meaning "hold off going to this
                    639:     (negated) state until the number of characters in the data field have
                    640:     been skipped". */
                    641: 
                    642:     if (state_offset < 0)
                    643:       {
                    644:       if (current_state->data > 0)
                    645:         {
                    646:         DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
                    647:         ADD_NEW_DATA(state_offset, current_state->count,
                    648:           current_state->data - 1);
                    649:         continue;
                    650:         }
                    651:       else
                    652:         {
                    653:         current_state->offset = state_offset = -state_offset;
                    654:         }
                    655:       }
                    656: 
                    657:     /* Check for a duplicate state with the same count, and skip if found.
                    658:     See the note at the head of this module about the possibility of improving
                    659:     performance here. */
                    660: 
                    661:     for (j = 0; j < i; j++)
                    662:       {
                    663:       if (active_states[j].offset == state_offset &&
                    664:           active_states[j].count == current_state->count)
                    665:         {
                    666:         DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
                    667:         goto NEXT_ACTIVE_STATE;
                    668:         }
                    669:       }
                    670: 
                    671:     /* The state offset is the offset to the opcode */
                    672: 
                    673:     code = start_code + state_offset;
                    674:     codevalue = *code;
                    675: 
                    676:     /* If this opcode inspects a character, but we are at the end of the
                    677:     subject, remember the fact for use when testing for a partial match. */
                    678: 
                    679:     if (clen == 0 && poptable[codevalue] != 0)
                    680:       could_continue = TRUE;
                    681: 
                    682:     /* If this opcode is followed by an inline character, load it. It is
                    683:     tempting to test for the presence of a subject character here, but that
                    684:     is wrong, because sometimes zero repetitions of the subject are
                    685:     permitted.
                    686: 
                    687:     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
                    688:     argument that is not a data character - but is always one byte long. We
                    689:     have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
                    690:     this case. To keep the other cases fast, convert these ones to new opcodes.
                    691:     */
                    692: 
                    693:     if (coptable[codevalue] > 0)
                    694:       {
                    695:       dlen = 1;
                    696: #ifdef SUPPORT_UTF8
                    697:       if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
                    698: #endif  /* SUPPORT_UTF8 */
                    699:       d = code[coptable[codevalue]];
                    700:       if (codevalue >= OP_TYPESTAR)
                    701:         {
                    702:         switch(d)
                    703:           {
                    704:           case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
                    705:           case OP_NOTPROP:
                    706:           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
                    707:           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
                    708:           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
                    709:           case OP_NOT_HSPACE:
                    710:           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
                    711:           case OP_NOT_VSPACE:
                    712:           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
                    713:           default: break;
                    714:           }
                    715:         }
                    716:       }
                    717:     else
                    718:       {
                    719:       dlen = 0;         /* Not strictly necessary, but compilers moan */
                    720:       d = NOTACHAR;     /* if these variables are not set. */
                    721:       }
                    722: 
                    723: 
                    724:     /* Now process the individual opcodes */
                    725: 
                    726:     switch (codevalue)
                    727:       {
                    728: /* ========================================================================== */
                    729:       /* These cases are never obeyed. This is a fudge that causes a compile-
                    730:       time error if the vectors coptable or poptable, which are indexed by
                    731:       opcode, are not the correct length. It seems to be the only way to do
                    732:       such a check at compile time, as the sizeof() operator does not work
                    733:       in the C preprocessor. */
                    734: 
                    735:       case OP_TABLE_LENGTH:
                    736:       case OP_TABLE_LENGTH +
                    737:         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
                    738:          (sizeof(poptable) == OP_TABLE_LENGTH)):
                    739:       break;
                    740: 
                    741: /* ========================================================================== */
                    742:       /* Reached a closing bracket. If not at the end of the pattern, carry
                    743:       on with the next opcode. For repeating opcodes, also add the repeat
                    744:       state. Note that KETRPOS will always be encountered at the end of the
                    745:       subpattern, because the possessive subpattern repeats are always handled
                    746:       using recursive calls. Thus, it never adds any new states.
                    747: 
                    748:       At the end of the (sub)pattern, unless we have an empty string and
                    749:       PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
                    750:       start of the subject, save the match data, shifting up all previous
                    751:       matches so we always have the longest first. */
                    752: 
                    753:       case OP_KET:
                    754:       case OP_KETRMIN:
                    755:       case OP_KETRMAX:
                    756:       case OP_KETRPOS:
                    757:       if (code != end_code)
                    758:         {
                    759:         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
                    760:         if (codevalue != OP_KET)
                    761:           {
                    762:           ADD_ACTIVE(state_offset - GET(code, 1), 0);
                    763:           }
                    764:         }
                    765:       else
                    766:         {
                    767:         if (ptr > current_subject ||
                    768:             ((md->moptions & PCRE_NOTEMPTY) == 0 &&
                    769:               ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
                    770:                 current_subject > start_subject + md->start_offset)))
                    771:           {
                    772:           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
                    773:             else if (match_count > 0 && ++match_count * 2 > offsetcount)
                    774:               match_count = 0;
                    775:           count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
                    776:           if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
                    777:           if (offsetcount >= 2)
                    778:             {
                    779:             offsets[0] = (int)(current_subject - start_subject);
                    780:             offsets[1] = (int)(ptr - start_subject);
                    781:             DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
                    782:               offsets[1] - offsets[0], current_subject));
                    783:             }
                    784:           if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
                    785:             {
                    786:             DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                    787:               "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
                    788:               match_count, rlevel*2-2, SP));
                    789:             return match_count;
                    790:             }
                    791:           }
                    792:         }
                    793:       break;
                    794: 
                    795: /* ========================================================================== */
                    796:       /* These opcodes add to the current list of states without looking
                    797:       at the current character. */
                    798: 
                    799:       /*-----------------------------------------------------------------*/
                    800:       case OP_ALT:
                    801:       do { code += GET(code, 1); } while (*code == OP_ALT);
                    802:       ADD_ACTIVE((int)(code - start_code), 0);
                    803:       break;
                    804: 
                    805:       /*-----------------------------------------------------------------*/
                    806:       case OP_BRA:
                    807:       case OP_SBRA:
                    808:       do
                    809:         {
                    810:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    811:         code += GET(code, 1);
                    812:         }
                    813:       while (*code == OP_ALT);
                    814:       break;
                    815: 
                    816:       /*-----------------------------------------------------------------*/
                    817:       case OP_CBRA:
                    818:       case OP_SCBRA:
                    819:       ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE),  0);
                    820:       code += GET(code, 1);
                    821:       while (*code == OP_ALT)
                    822:         {
                    823:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
                    824:         code += GET(code, 1);
                    825:         }
                    826:       break;
                    827: 
                    828:       /*-----------------------------------------------------------------*/
                    829:       case OP_BRAZERO:
                    830:       case OP_BRAMINZERO:
                    831:       ADD_ACTIVE(state_offset + 1, 0);
                    832:       code += 1 + GET(code, 2);
                    833:       while (*code == OP_ALT) code += GET(code, 1);
                    834:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    835:       break;
                    836: 
                    837:       /*-----------------------------------------------------------------*/
                    838:       case OP_SKIPZERO:
                    839:       code += 1 + GET(code, 2);
                    840:       while (*code == OP_ALT) code += GET(code, 1);
                    841:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    842:       break;
                    843: 
                    844:       /*-----------------------------------------------------------------*/
                    845:       case OP_CIRC:
                    846:       if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
                    847:         { ADD_ACTIVE(state_offset + 1, 0); }
                    848:       break;
                    849: 
                    850:       /*-----------------------------------------------------------------*/
                    851:       case OP_CIRCM:
                    852:       if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
                    853:           (ptr != end_subject && WAS_NEWLINE(ptr)))
                    854:         { ADD_ACTIVE(state_offset + 1, 0); }
                    855:       break;
                    856: 
                    857:       /*-----------------------------------------------------------------*/
                    858:       case OP_EOD:
                    859:       if (ptr >= end_subject)
                    860:         {
                    861:         if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    862:           could_continue = TRUE;
                    863:         else { ADD_ACTIVE(state_offset + 1, 0); }
                    864:         }
                    865:       break;
                    866: 
                    867:       /*-----------------------------------------------------------------*/
                    868:       case OP_SOD:
                    869:       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
                    870:       break;
                    871: 
                    872:       /*-----------------------------------------------------------------*/
                    873:       case OP_SOM:
                    874:       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
                    875:       break;
                    876: 
                    877: 
                    878: /* ========================================================================== */
                    879:       /* These opcodes inspect the next subject character, and sometimes
                    880:       the previous one as well, but do not have an argument. The variable
                    881:       clen contains the length of the current character and is zero if we are
                    882:       at the end of the subject. */
                    883: 
                    884:       /*-----------------------------------------------------------------*/
                    885:       case OP_ANY:
                    886:       if (clen > 0 && !IS_NEWLINE(ptr))
                    887:         { ADD_NEW(state_offset + 1, 0); }
                    888:       break;
                    889: 
                    890:       /*-----------------------------------------------------------------*/
                    891:       case OP_ALLANY:
                    892:       if (clen > 0)
                    893:         { ADD_NEW(state_offset + 1, 0); }
                    894:       break;
                    895: 
                    896:       /*-----------------------------------------------------------------*/
                    897:       case OP_EODN:
                    898:       if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    899:         could_continue = TRUE;
                    900:       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
                    901:         { ADD_ACTIVE(state_offset + 1, 0); }
                    902:       break;
                    903: 
                    904:       /*-----------------------------------------------------------------*/
                    905:       case OP_DOLL:
                    906:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    907:         {
                    908:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    909:           could_continue = TRUE;
                    910:         else if (clen == 0 ||
                    911:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
                    912:                (ptr == end_subject - md->nllen)
                    913:             ))
                    914:           { ADD_ACTIVE(state_offset + 1, 0); }
                    915:         }
                    916:       break;
                    917: 
                    918:       /*-----------------------------------------------------------------*/
                    919:       case OP_DOLLM:
                    920:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    921:         {
                    922:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    923:           could_continue = TRUE;
                    924:         else if (clen == 0 ||
                    925:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
                    926:           { ADD_ACTIVE(state_offset + 1, 0); }
                    927:         }
                    928:       else if (IS_NEWLINE(ptr))
                    929:         { ADD_ACTIVE(state_offset + 1, 0); }
                    930:       break;
                    931: 
                    932:       /*-----------------------------------------------------------------*/
                    933: 
                    934:       case OP_DIGIT:
                    935:       case OP_WHITESPACE:
                    936:       case OP_WORDCHAR:
                    937:       if (clen > 0 && c < 256 &&
                    938:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
                    939:         { ADD_NEW(state_offset + 1, 0); }
                    940:       break;
                    941: 
                    942:       /*-----------------------------------------------------------------*/
                    943:       case OP_NOT_DIGIT:
                    944:       case OP_NOT_WHITESPACE:
                    945:       case OP_NOT_WORDCHAR:
                    946:       if (clen > 0 && (c >= 256 ||
                    947:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
                    948:         { ADD_NEW(state_offset + 1, 0); }
                    949:       break;
                    950: 
                    951:       /*-----------------------------------------------------------------*/
                    952:       case OP_WORD_BOUNDARY:
                    953:       case OP_NOT_WORD_BOUNDARY:
                    954:         {
                    955:         int left_word, right_word;
                    956: 
                    957:         if (ptr > start_subject)
                    958:           {
                    959:           const uschar *temp = ptr - 1;
                    960:           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
                    961: #ifdef SUPPORT_UTF8
                    962:           if (utf8) BACKCHAR(temp);
                    963: #endif
                    964:           GETCHARTEST(d, temp);
                    965: #ifdef SUPPORT_UCP
                    966:           if ((md->poptions & PCRE_UCP) != 0)
                    967:             {
                    968:             if (d == '_') left_word = TRUE; else
                    969:               {
                    970:               int cat = UCD_CATEGORY(d);
                    971:               left_word = (cat == ucp_L || cat == ucp_N);
                    972:               }
                    973:             }
                    974:           else
                    975: #endif
                    976:           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
                    977:           }
                    978:         else left_word = FALSE;
                    979: 
                    980:         if (clen > 0)
                    981:           {
                    982: #ifdef SUPPORT_UCP
                    983:           if ((md->poptions & PCRE_UCP) != 0)
                    984:             {
                    985:             if (c == '_') right_word = TRUE; else
                    986:               {
                    987:               int cat = UCD_CATEGORY(c);
                    988:               right_word = (cat == ucp_L || cat == ucp_N);
                    989:               }
                    990:             }
                    991:           else
                    992: #endif
                    993:           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
                    994:           }
                    995:         else right_word = FALSE;
                    996: 
                    997:         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
                    998:           { ADD_ACTIVE(state_offset + 1, 0); }
                    999:         }
                   1000:       break;
                   1001: 
                   1002: 
                   1003:       /*-----------------------------------------------------------------*/
                   1004:       /* Check the next character by Unicode property. We will get here only
                   1005:       if the support is in the binary; otherwise a compile-time error occurs.
                   1006:       */
                   1007: 
                   1008: #ifdef SUPPORT_UCP
                   1009:       case OP_PROP:
                   1010:       case OP_NOTPROP:
                   1011:       if (clen > 0)
                   1012:         {
                   1013:         BOOL OK;
                   1014:         const ucd_record * prop = GET_UCD(c);
                   1015:         switch(code[1])
                   1016:           {
                   1017:           case PT_ANY:
                   1018:           OK = TRUE;
                   1019:           break;
                   1020: 
                   1021:           case PT_LAMP:
                   1022:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1023:                prop->chartype == ucp_Lt;
                   1024:           break;
                   1025: 
                   1026:           case PT_GC:
                   1027:           OK = _pcre_ucp_gentype[prop->chartype] == code[2];
                   1028:           break;
                   1029: 
                   1030:           case PT_PC:
                   1031:           OK = prop->chartype == code[2];
                   1032:           break;
                   1033: 
                   1034:           case PT_SC:
                   1035:           OK = prop->script == code[2];
                   1036:           break;
                   1037: 
                   1038:           /* These are specials for combination cases. */
                   1039: 
                   1040:           case PT_ALNUM:
                   1041:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1042:                _pcre_ucp_gentype[prop->chartype] == ucp_N;
                   1043:           break;
                   1044: 
                   1045:           case PT_SPACE:    /* Perl space */
                   1046:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1047:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1048:           break;
                   1049: 
                   1050:           case PT_PXSPACE:  /* POSIX space */
                   1051:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1052:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1053:                c == CHAR_FF || c == CHAR_CR;
                   1054:           break;
                   1055: 
                   1056:           case PT_WORD:
                   1057:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1058:                _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   1059:                c == CHAR_UNDERSCORE;
                   1060:           break;
                   1061: 
                   1062:           /* Should never occur, but keep compilers from grumbling. */
                   1063: 
                   1064:           default:
                   1065:           OK = codevalue != OP_PROP;
                   1066:           break;
                   1067:           }
                   1068: 
                   1069:         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
                   1070:         }
                   1071:       break;
                   1072: #endif
                   1073: 
                   1074: 
                   1075: 
                   1076: /* ========================================================================== */
                   1077:       /* These opcodes likewise inspect the subject character, but have an
                   1078:       argument that is not a data character. It is one of these opcodes:
                   1079:       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
                   1080:       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
                   1081: 
                   1082:       case OP_TYPEPLUS:
                   1083:       case OP_TYPEMINPLUS:
                   1084:       case OP_TYPEPOSPLUS:
                   1085:       count = current_state->count;  /* Already matched */
                   1086:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1087:       if (clen > 0)
                   1088:         {
                   1089:         if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
                   1090:             (c < 256 &&
                   1091:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1092:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1093:           {
                   1094:           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
                   1095:             {
                   1096:             active_count--;            /* Remove non-match possibility */
                   1097:             next_active_state--;
                   1098:             }
                   1099:           count++;
                   1100:           ADD_NEW(state_offset, count);
                   1101:           }
                   1102:         }
                   1103:       break;
                   1104: 
                   1105:       /*-----------------------------------------------------------------*/
                   1106:       case OP_TYPEQUERY:
                   1107:       case OP_TYPEMINQUERY:
                   1108:       case OP_TYPEPOSQUERY:
                   1109:       ADD_ACTIVE(state_offset + 2, 0);
                   1110:       if (clen > 0)
                   1111:         {
                   1112:         if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
                   1113:             (c < 256 &&
                   1114:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1115:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1116:           {
                   1117:           if (codevalue == OP_TYPEPOSQUERY)
                   1118:             {
                   1119:             active_count--;            /* Remove non-match possibility */
                   1120:             next_active_state--;
                   1121:             }
                   1122:           ADD_NEW(state_offset + 2, 0);
                   1123:           }
                   1124:         }
                   1125:       break;
                   1126: 
                   1127:       /*-----------------------------------------------------------------*/
                   1128:       case OP_TYPESTAR:
                   1129:       case OP_TYPEMINSTAR:
                   1130:       case OP_TYPEPOSSTAR:
                   1131:       ADD_ACTIVE(state_offset + 2, 0);
                   1132:       if (clen > 0)
                   1133:         {
                   1134:         if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
                   1135:             (c < 256 &&
                   1136:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1137:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1138:           {
                   1139:           if (codevalue == OP_TYPEPOSSTAR)
                   1140:             {
                   1141:             active_count--;            /* Remove non-match possibility */
                   1142:             next_active_state--;
                   1143:             }
                   1144:           ADD_NEW(state_offset, 0);
                   1145:           }
                   1146:         }
                   1147:       break;
                   1148: 
                   1149:       /*-----------------------------------------------------------------*/
                   1150:       case OP_TYPEEXACT:
                   1151:       count = current_state->count;  /* Number already matched */
                   1152:       if (clen > 0)
                   1153:         {
                   1154:         if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
                   1155:             (c < 256 &&
                   1156:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1157:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1158:           {
                   1159:           if (++count >= GET2(code, 1))
                   1160:             { ADD_NEW(state_offset + 4, 0); }
                   1161:           else
                   1162:             { ADD_NEW(state_offset, count); }
                   1163:           }
                   1164:         }
                   1165:       break;
                   1166: 
                   1167:       /*-----------------------------------------------------------------*/
                   1168:       case OP_TYPEUPTO:
                   1169:       case OP_TYPEMINUPTO:
                   1170:       case OP_TYPEPOSUPTO:
                   1171:       ADD_ACTIVE(state_offset + 4, 0);
                   1172:       count = current_state->count;  /* Number already matched */
                   1173:       if (clen > 0)
                   1174:         {
                   1175:         if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
                   1176:             (c < 256 &&
                   1177:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1178:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1179:           {
                   1180:           if (codevalue == OP_TYPEPOSUPTO)
                   1181:             {
                   1182:             active_count--;           /* Remove non-match possibility */
                   1183:             next_active_state--;
                   1184:             }
                   1185:           if (++count >= GET2(code, 1))
                   1186:             { ADD_NEW(state_offset + 4, 0); }
                   1187:           else
                   1188:             { ADD_NEW(state_offset, count); }
                   1189:           }
                   1190:         }
                   1191:       break;
                   1192: 
                   1193: /* ========================================================================== */
                   1194:       /* These are virtual opcodes that are used when something like
                   1195:       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
                   1196:       argument. It keeps the code above fast for the other cases. The argument
                   1197:       is in the d variable. */
                   1198: 
                   1199: #ifdef SUPPORT_UCP
                   1200:       case OP_PROP_EXTRA + OP_TYPEPLUS:
                   1201:       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
                   1202:       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
                   1203:       count = current_state->count;           /* Already matched */
                   1204:       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
                   1205:       if (clen > 0)
                   1206:         {
                   1207:         BOOL OK;
                   1208:         const ucd_record * prop = GET_UCD(c);
                   1209:         switch(code[2])
                   1210:           {
                   1211:           case PT_ANY:
                   1212:           OK = TRUE;
                   1213:           break;
                   1214: 
                   1215:           case PT_LAMP:
                   1216:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1217:             prop->chartype == ucp_Lt;
                   1218:           break;
                   1219: 
                   1220:           case PT_GC:
                   1221:           OK = _pcre_ucp_gentype[prop->chartype] == code[3];
                   1222:           break;
                   1223: 
                   1224:           case PT_PC:
                   1225:           OK = prop->chartype == code[3];
                   1226:           break;
                   1227: 
                   1228:           case PT_SC:
                   1229:           OK = prop->script == code[3];
                   1230:           break;
                   1231: 
                   1232:           /* These are specials for combination cases. */
                   1233: 
                   1234:           case PT_ALNUM:
                   1235:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1236:                _pcre_ucp_gentype[prop->chartype] == ucp_N;
                   1237:           break;
                   1238: 
                   1239:           case PT_SPACE:    /* Perl space */
                   1240:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1241:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1242:           break;
                   1243: 
                   1244:           case PT_PXSPACE:  /* POSIX space */
                   1245:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1246:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1247:                c == CHAR_FF || c == CHAR_CR;
                   1248:           break;
                   1249: 
                   1250:           case PT_WORD:
                   1251:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1252:                _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   1253:                c == CHAR_UNDERSCORE;
                   1254:           break;
                   1255: 
                   1256:           /* Should never occur, but keep compilers from grumbling. */
                   1257: 
                   1258:           default:
                   1259:           OK = codevalue != OP_PROP;
                   1260:           break;
                   1261:           }
                   1262: 
                   1263:         if (OK == (d == OP_PROP))
                   1264:           {
                   1265:           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
                   1266:             {
                   1267:             active_count--;           /* Remove non-match possibility */
                   1268:             next_active_state--;
                   1269:             }
                   1270:           count++;
                   1271:           ADD_NEW(state_offset, count);
                   1272:           }
                   1273:         }
                   1274:       break;
                   1275: 
                   1276:       /*-----------------------------------------------------------------*/
                   1277:       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
                   1278:       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
                   1279:       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
                   1280:       count = current_state->count;  /* Already matched */
                   1281:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1282:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   1283:         {
                   1284:         const uschar *nptr = ptr + clen;
                   1285:         int ncount = 0;
                   1286:         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
                   1287:           {
                   1288:           active_count--;           /* Remove non-match possibility */
                   1289:           next_active_state--;
                   1290:           }
                   1291:         while (nptr < end_subject)
                   1292:           {
                   1293:           int nd;
                   1294:           int ndlen = 1;
                   1295:           GETCHARLEN(nd, nptr, ndlen);
                   1296:           if (UCD_CATEGORY(nd) != ucp_M) break;
                   1297:           ncount++;
                   1298:           nptr += ndlen;
                   1299:           }
                   1300:         count++;
                   1301:         ADD_NEW_DATA(-state_offset, count, ncount);
                   1302:         }
                   1303:       break;
                   1304: #endif
                   1305: 
                   1306:       /*-----------------------------------------------------------------*/
                   1307:       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
                   1308:       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
                   1309:       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
                   1310:       count = current_state->count;  /* Already matched */
                   1311:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1312:       if (clen > 0)
                   1313:         {
                   1314:         int ncount = 0;
                   1315:         switch (c)
                   1316:           {
                   1317:           case 0x000b:
                   1318:           case 0x000c:
                   1319:           case 0x0085:
                   1320:           case 0x2028:
                   1321:           case 0x2029:
                   1322:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1323:           goto ANYNL01;
                   1324: 
                   1325:           case 0x000d:
                   1326:           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
                   1327:           /* Fall through */
                   1328: 
                   1329:           ANYNL01:
                   1330:           case 0x000a:
                   1331:           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
                   1332:             {
                   1333:             active_count--;           /* Remove non-match possibility */
                   1334:             next_active_state--;
                   1335:             }
                   1336:           count++;
                   1337:           ADD_NEW_DATA(-state_offset, count, ncount);
                   1338:           break;
                   1339: 
                   1340:           default:
                   1341:           break;
                   1342:           }
                   1343:         }
                   1344:       break;
                   1345: 
                   1346:       /*-----------------------------------------------------------------*/
                   1347:       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
                   1348:       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
                   1349:       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1350:       count = current_state->count;  /* Already matched */
                   1351:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1352:       if (clen > 0)
                   1353:         {
                   1354:         BOOL OK;
                   1355:         switch (c)
                   1356:           {
                   1357:           case 0x000a:
                   1358:           case 0x000b:
                   1359:           case 0x000c:
                   1360:           case 0x000d:
                   1361:           case 0x0085:
                   1362:           case 0x2028:
                   1363:           case 0x2029:
                   1364:           OK = TRUE;
                   1365:           break;
                   1366: 
                   1367:           default:
                   1368:           OK = FALSE;
                   1369:           break;
                   1370:           }
                   1371: 
                   1372:         if (OK == (d == OP_VSPACE))
                   1373:           {
                   1374:           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1375:             {
                   1376:             active_count--;           /* Remove non-match possibility */
                   1377:             next_active_state--;
                   1378:             }
                   1379:           count++;
                   1380:           ADD_NEW_DATA(-state_offset, count, 0);
                   1381:           }
                   1382:         }
                   1383:       break;
                   1384: 
                   1385:       /*-----------------------------------------------------------------*/
                   1386:       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
                   1387:       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
                   1388:       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1389:       count = current_state->count;  /* Already matched */
                   1390:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1391:       if (clen > 0)
                   1392:         {
                   1393:         BOOL OK;
                   1394:         switch (c)
                   1395:           {
                   1396:           case 0x09:      /* HT */
                   1397:           case 0x20:      /* SPACE */
                   1398:           case 0xa0:      /* NBSP */
                   1399:           case 0x1680:    /* OGHAM SPACE MARK */
                   1400:           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1401:           case 0x2000:    /* EN QUAD */
                   1402:           case 0x2001:    /* EM QUAD */
                   1403:           case 0x2002:    /* EN SPACE */
                   1404:           case 0x2003:    /* EM SPACE */
                   1405:           case 0x2004:    /* THREE-PER-EM SPACE */
                   1406:           case 0x2005:    /* FOUR-PER-EM SPACE */
                   1407:           case 0x2006:    /* SIX-PER-EM SPACE */
                   1408:           case 0x2007:    /* FIGURE SPACE */
                   1409:           case 0x2008:    /* PUNCTUATION SPACE */
                   1410:           case 0x2009:    /* THIN SPACE */
                   1411:           case 0x200A:    /* HAIR SPACE */
                   1412:           case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1413:           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1414:           case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1415:           OK = TRUE;
                   1416:           break;
                   1417: 
                   1418:           default:
                   1419:           OK = FALSE;
                   1420:           break;
                   1421:           }
                   1422: 
                   1423:         if (OK == (d == OP_HSPACE))
                   1424:           {
                   1425:           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1426:             {
                   1427:             active_count--;           /* Remove non-match possibility */
                   1428:             next_active_state--;
                   1429:             }
                   1430:           count++;
                   1431:           ADD_NEW_DATA(-state_offset, count, 0);
                   1432:           }
                   1433:         }
                   1434:       break;
                   1435: 
                   1436:       /*-----------------------------------------------------------------*/
                   1437: #ifdef SUPPORT_UCP
                   1438:       case OP_PROP_EXTRA + OP_TYPEQUERY:
                   1439:       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
                   1440:       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
                   1441:       count = 4;
                   1442:       goto QS1;
                   1443: 
                   1444:       case OP_PROP_EXTRA + OP_TYPESTAR:
                   1445:       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
                   1446:       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
                   1447:       count = 0;
                   1448: 
                   1449:       QS1:
                   1450: 
                   1451:       ADD_ACTIVE(state_offset + 4, 0);
                   1452:       if (clen > 0)
                   1453:         {
                   1454:         BOOL OK;
                   1455:         const ucd_record * prop = GET_UCD(c);
                   1456:         switch(code[2])
                   1457:           {
                   1458:           case PT_ANY:
                   1459:           OK = TRUE;
                   1460:           break;
                   1461: 
                   1462:           case PT_LAMP:
                   1463:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1464:             prop->chartype == ucp_Lt;
                   1465:           break;
                   1466: 
                   1467:           case PT_GC:
                   1468:           OK = _pcre_ucp_gentype[prop->chartype] == code[3];
                   1469:           break;
                   1470: 
                   1471:           case PT_PC:
                   1472:           OK = prop->chartype == code[3];
                   1473:           break;
                   1474: 
                   1475:           case PT_SC:
                   1476:           OK = prop->script == code[3];
                   1477:           break;
                   1478: 
                   1479:           /* These are specials for combination cases. */
                   1480: 
                   1481:           case PT_ALNUM:
                   1482:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1483:                _pcre_ucp_gentype[prop->chartype] == ucp_N;
                   1484:           break;
                   1485: 
                   1486:           case PT_SPACE:    /* Perl space */
                   1487:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1488:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1489:           break;
                   1490: 
                   1491:           case PT_PXSPACE:  /* POSIX space */
                   1492:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1493:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1494:                c == CHAR_FF || c == CHAR_CR;
                   1495:           break;
                   1496: 
                   1497:           case PT_WORD:
                   1498:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1499:                _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   1500:                c == CHAR_UNDERSCORE;
                   1501:           break;
                   1502: 
                   1503:           /* Should never occur, but keep compilers from grumbling. */
                   1504: 
                   1505:           default:
                   1506:           OK = codevalue != OP_PROP;
                   1507:           break;
                   1508:           }
                   1509: 
                   1510:         if (OK == (d == OP_PROP))
                   1511:           {
                   1512:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
                   1513:               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
                   1514:             {
                   1515:             active_count--;           /* Remove non-match possibility */
                   1516:             next_active_state--;
                   1517:             }
                   1518:           ADD_NEW(state_offset + count, 0);
                   1519:           }
                   1520:         }
                   1521:       break;
                   1522: 
                   1523:       /*-----------------------------------------------------------------*/
                   1524:       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
                   1525:       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
                   1526:       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
                   1527:       count = 2;
                   1528:       goto QS2;
                   1529: 
                   1530:       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
                   1531:       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
                   1532:       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
                   1533:       count = 0;
                   1534: 
                   1535:       QS2:
                   1536: 
                   1537:       ADD_ACTIVE(state_offset + 2, 0);
                   1538:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   1539:         {
                   1540:         const uschar *nptr = ptr + clen;
                   1541:         int ncount = 0;
                   1542:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
                   1543:             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
                   1544:           {
                   1545:           active_count--;           /* Remove non-match possibility */
                   1546:           next_active_state--;
                   1547:           }
                   1548:         while (nptr < end_subject)
                   1549:           {
                   1550:           int nd;
                   1551:           int ndlen = 1;
                   1552:           GETCHARLEN(nd, nptr, ndlen);
                   1553:           if (UCD_CATEGORY(nd) != ucp_M) break;
                   1554:           ncount++;
                   1555:           nptr += ndlen;
                   1556:           }
                   1557:         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
                   1558:         }
                   1559:       break;
                   1560: #endif
                   1561: 
                   1562:       /*-----------------------------------------------------------------*/
                   1563:       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
                   1564:       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
                   1565:       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
                   1566:       count = 2;
                   1567:       goto QS3;
                   1568: 
                   1569:       case OP_ANYNL_EXTRA + OP_TYPESTAR:
                   1570:       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
                   1571:       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
                   1572:       count = 0;
                   1573: 
                   1574:       QS3:
                   1575:       ADD_ACTIVE(state_offset + 2, 0);
                   1576:       if (clen > 0)
                   1577:         {
                   1578:         int ncount = 0;
                   1579:         switch (c)
                   1580:           {
                   1581:           case 0x000b:
                   1582:           case 0x000c:
                   1583:           case 0x0085:
                   1584:           case 0x2028:
                   1585:           case 0x2029:
                   1586:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1587:           goto ANYNL02;
                   1588: 
                   1589:           case 0x000d:
                   1590:           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
                   1591:           /* Fall through */
                   1592: 
                   1593:           ANYNL02:
                   1594:           case 0x000a:
                   1595:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
                   1596:               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
                   1597:             {
                   1598:             active_count--;           /* Remove non-match possibility */
                   1599:             next_active_state--;
                   1600:             }
                   1601:           ADD_NEW_DATA(-(state_offset + count), 0, ncount);
                   1602:           break;
                   1603: 
                   1604:           default:
                   1605:           break;
                   1606:           }
                   1607:         }
                   1608:       break;
                   1609: 
                   1610:       /*-----------------------------------------------------------------*/
                   1611:       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
                   1612:       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
                   1613:       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1614:       count = 2;
                   1615:       goto QS4;
                   1616: 
                   1617:       case OP_VSPACE_EXTRA + OP_TYPESTAR:
                   1618:       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
                   1619:       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1620:       count = 0;
                   1621: 
                   1622:       QS4:
                   1623:       ADD_ACTIVE(state_offset + 2, 0);
                   1624:       if (clen > 0)
                   1625:         {
                   1626:         BOOL OK;
                   1627:         switch (c)
                   1628:           {
                   1629:           case 0x000a:
                   1630:           case 0x000b:
                   1631:           case 0x000c:
                   1632:           case 0x000d:
                   1633:           case 0x0085:
                   1634:           case 0x2028:
                   1635:           case 0x2029:
                   1636:           OK = TRUE;
                   1637:           break;
                   1638: 
                   1639:           default:
                   1640:           OK = FALSE;
                   1641:           break;
                   1642:           }
                   1643:         if (OK == (d == OP_VSPACE))
                   1644:           {
                   1645:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1646:               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1647:             {
                   1648:             active_count--;           /* Remove non-match possibility */
                   1649:             next_active_state--;
                   1650:             }
                   1651:           ADD_NEW_DATA(-(state_offset + count), 0, 0);
                   1652:           }
                   1653:         }
                   1654:       break;
                   1655: 
                   1656:       /*-----------------------------------------------------------------*/
                   1657:       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
                   1658:       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
                   1659:       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1660:       count = 2;
                   1661:       goto QS5;
                   1662: 
                   1663:       case OP_HSPACE_EXTRA + OP_TYPESTAR:
                   1664:       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
                   1665:       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1666:       count = 0;
                   1667: 
                   1668:       QS5:
                   1669:       ADD_ACTIVE(state_offset + 2, 0);
                   1670:       if (clen > 0)
                   1671:         {
                   1672:         BOOL OK;
                   1673:         switch (c)
                   1674:           {
                   1675:           case 0x09:      /* HT */
                   1676:           case 0x20:      /* SPACE */
                   1677:           case 0xa0:      /* NBSP */
                   1678:           case 0x1680:    /* OGHAM SPACE MARK */
                   1679:           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1680:           case 0x2000:    /* EN QUAD */
                   1681:           case 0x2001:    /* EM QUAD */
                   1682:           case 0x2002:    /* EN SPACE */
                   1683:           case 0x2003:    /* EM SPACE */
                   1684:           case 0x2004:    /* THREE-PER-EM SPACE */
                   1685:           case 0x2005:    /* FOUR-PER-EM SPACE */
                   1686:           case 0x2006:    /* SIX-PER-EM SPACE */
                   1687:           case 0x2007:    /* FIGURE SPACE */
                   1688:           case 0x2008:    /* PUNCTUATION SPACE */
                   1689:           case 0x2009:    /* THIN SPACE */
                   1690:           case 0x200A:    /* HAIR SPACE */
                   1691:           case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1692:           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1693:           case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1694:           OK = TRUE;
                   1695:           break;
                   1696: 
                   1697:           default:
                   1698:           OK = FALSE;
                   1699:           break;
                   1700:           }
                   1701: 
                   1702:         if (OK == (d == OP_HSPACE))
                   1703:           {
                   1704:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1705:               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1706:             {
                   1707:             active_count--;           /* Remove non-match possibility */
                   1708:             next_active_state--;
                   1709:             }
                   1710:           ADD_NEW_DATA(-(state_offset + count), 0, 0);
                   1711:           }
                   1712:         }
                   1713:       break;
                   1714: 
                   1715:       /*-----------------------------------------------------------------*/
                   1716: #ifdef SUPPORT_UCP
                   1717:       case OP_PROP_EXTRA + OP_TYPEEXACT:
                   1718:       case OP_PROP_EXTRA + OP_TYPEUPTO:
                   1719:       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
                   1720:       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
                   1721:       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
                   1722:         { ADD_ACTIVE(state_offset + 6, 0); }
                   1723:       count = current_state->count;  /* Number already matched */
                   1724:       if (clen > 0)
                   1725:         {
                   1726:         BOOL OK;
                   1727:         const ucd_record * prop = GET_UCD(c);
                   1728:         switch(code[4])
                   1729:           {
                   1730:           case PT_ANY:
                   1731:           OK = TRUE;
                   1732:           break;
                   1733: 
                   1734:           case PT_LAMP:
                   1735:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1736:             prop->chartype == ucp_Lt;
                   1737:           break;
                   1738: 
                   1739:           case PT_GC:
                   1740:           OK = _pcre_ucp_gentype[prop->chartype] == code[5];
                   1741:           break;
                   1742: 
                   1743:           case PT_PC:
                   1744:           OK = prop->chartype == code[5];
                   1745:           break;
                   1746: 
                   1747:           case PT_SC:
                   1748:           OK = prop->script == code[5];
                   1749:           break;
                   1750: 
                   1751:           /* These are specials for combination cases. */
                   1752: 
                   1753:           case PT_ALNUM:
                   1754:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1755:                _pcre_ucp_gentype[prop->chartype] == ucp_N;
                   1756:           break;
                   1757: 
                   1758:           case PT_SPACE:    /* Perl space */
                   1759:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1760:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1761:           break;
                   1762: 
                   1763:           case PT_PXSPACE:  /* POSIX space */
                   1764:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   1765:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1766:                c == CHAR_FF || c == CHAR_CR;
                   1767:           break;
                   1768: 
                   1769:           case PT_WORD:
                   1770:           OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   1771:                _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   1772:                c == CHAR_UNDERSCORE;
                   1773:           break;
                   1774: 
                   1775:           /* Should never occur, but keep compilers from grumbling. */
                   1776: 
                   1777:           default:
                   1778:           OK = codevalue != OP_PROP;
                   1779:           break;
                   1780:           }
                   1781: 
                   1782:         if (OK == (d == OP_PROP))
                   1783:           {
                   1784:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
                   1785:             {
                   1786:             active_count--;           /* Remove non-match possibility */
                   1787:             next_active_state--;
                   1788:             }
                   1789:           if (++count >= GET2(code, 1))
                   1790:             { ADD_NEW(state_offset + 6, 0); }
                   1791:           else
                   1792:             { ADD_NEW(state_offset, count); }
                   1793:           }
                   1794:         }
                   1795:       break;
                   1796: 
                   1797:       /*-----------------------------------------------------------------*/
                   1798:       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
                   1799:       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
                   1800:       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
                   1801:       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
                   1802:       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
                   1803:         { ADD_ACTIVE(state_offset + 4, 0); }
                   1804:       count = current_state->count;  /* Number already matched */
                   1805:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   1806:         {
                   1807:         const uschar *nptr = ptr + clen;
                   1808:         int ncount = 0;
                   1809:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
                   1810:           {
                   1811:           active_count--;           /* Remove non-match possibility */
                   1812:           next_active_state--;
                   1813:           }
                   1814:         while (nptr < end_subject)
                   1815:           {
                   1816:           int nd;
                   1817:           int ndlen = 1;
                   1818:           GETCHARLEN(nd, nptr, ndlen);
                   1819:           if (UCD_CATEGORY(nd) != ucp_M) break;
                   1820:           ncount++;
                   1821:           nptr += ndlen;
                   1822:           }
                   1823:         if (++count >= GET2(code, 1))
                   1824:           { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
                   1825:         else
                   1826:           { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1827:         }
                   1828:       break;
                   1829: #endif
                   1830: 
                   1831:       /*-----------------------------------------------------------------*/
                   1832:       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
                   1833:       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
                   1834:       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
                   1835:       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
                   1836:       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
                   1837:         { ADD_ACTIVE(state_offset + 4, 0); }
                   1838:       count = current_state->count;  /* Number already matched */
                   1839:       if (clen > 0)
                   1840:         {
                   1841:         int ncount = 0;
                   1842:         switch (c)
                   1843:           {
                   1844:           case 0x000b:
                   1845:           case 0x000c:
                   1846:           case 0x0085:
                   1847:           case 0x2028:
                   1848:           case 0x2029:
                   1849:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1850:           goto ANYNL03;
                   1851: 
                   1852:           case 0x000d:
                   1853:           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
                   1854:           /* Fall through */
                   1855: 
                   1856:           ANYNL03:
                   1857:           case 0x000a:
                   1858:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
                   1859:             {
                   1860:             active_count--;           /* Remove non-match possibility */
                   1861:             next_active_state--;
                   1862:             }
                   1863:           if (++count >= GET2(code, 1))
                   1864:             { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
                   1865:           else
                   1866:             { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1867:           break;
                   1868: 
                   1869:           default:
                   1870:           break;
                   1871:           }
                   1872:         }
                   1873:       break;
                   1874: 
                   1875:       /*-----------------------------------------------------------------*/
                   1876:       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
                   1877:       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
                   1878:       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
                   1879:       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
                   1880:       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
                   1881:         { ADD_ACTIVE(state_offset + 4, 0); }
                   1882:       count = current_state->count;  /* Number already matched */
                   1883:       if (clen > 0)
                   1884:         {
                   1885:         BOOL OK;
                   1886:         switch (c)
                   1887:           {
                   1888:           case 0x000a:
                   1889:           case 0x000b:
                   1890:           case 0x000c:
                   1891:           case 0x000d:
                   1892:           case 0x0085:
                   1893:           case 0x2028:
                   1894:           case 0x2029:
                   1895:           OK = TRUE;
                   1896:           break;
                   1897: 
                   1898:           default:
                   1899:           OK = FALSE;
                   1900:           }
                   1901: 
                   1902:         if (OK == (d == OP_VSPACE))
                   1903:           {
                   1904:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
                   1905:             {
                   1906:             active_count--;           /* Remove non-match possibility */
                   1907:             next_active_state--;
                   1908:             }
                   1909:           if (++count >= GET2(code, 1))
                   1910:             { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
                   1911:           else
                   1912:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   1913:           }
                   1914:         }
                   1915:       break;
                   1916: 
                   1917:       /*-----------------------------------------------------------------*/
                   1918:       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
                   1919:       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
                   1920:       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
                   1921:       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
                   1922:       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
                   1923:         { ADD_ACTIVE(state_offset + 4, 0); }
                   1924:       count = current_state->count;  /* Number already matched */
                   1925:       if (clen > 0)
                   1926:         {
                   1927:         BOOL OK;
                   1928:         switch (c)
                   1929:           {
                   1930:           case 0x09:      /* HT */
                   1931:           case 0x20:      /* SPACE */
                   1932:           case 0xa0:      /* NBSP */
                   1933:           case 0x1680:    /* OGHAM SPACE MARK */
                   1934:           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1935:           case 0x2000:    /* EN QUAD */
                   1936:           case 0x2001:    /* EM QUAD */
                   1937:           case 0x2002:    /* EN SPACE */
                   1938:           case 0x2003:    /* EM SPACE */
                   1939:           case 0x2004:    /* THREE-PER-EM SPACE */
                   1940:           case 0x2005:    /* FOUR-PER-EM SPACE */
                   1941:           case 0x2006:    /* SIX-PER-EM SPACE */
                   1942:           case 0x2007:    /* FIGURE SPACE */
                   1943:           case 0x2008:    /* PUNCTUATION SPACE */
                   1944:           case 0x2009:    /* THIN SPACE */
                   1945:           case 0x200A:    /* HAIR SPACE */
                   1946:           case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1947:           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1948:           case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1949:           OK = TRUE;
                   1950:           break;
                   1951: 
                   1952:           default:
                   1953:           OK = FALSE;
                   1954:           break;
                   1955:           }
                   1956: 
                   1957:         if (OK == (d == OP_HSPACE))
                   1958:           {
                   1959:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
                   1960:             {
                   1961:             active_count--;           /* Remove non-match possibility */
                   1962:             next_active_state--;
                   1963:             }
                   1964:           if (++count >= GET2(code, 1))
                   1965:             { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
                   1966:           else
                   1967:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   1968:           }
                   1969:         }
                   1970:       break;
                   1971: 
                   1972: /* ========================================================================== */
                   1973:       /* These opcodes are followed by a character that is usually compared
                   1974:       to the current subject character; it is loaded into d. We still get
                   1975:       here even if there is no subject character, because in some cases zero
                   1976:       repetitions are permitted. */
                   1977: 
                   1978:       /*-----------------------------------------------------------------*/
                   1979:       case OP_CHAR:
                   1980:       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   1981:       break;
                   1982: 
                   1983:       /*-----------------------------------------------------------------*/
                   1984:       case OP_CHARI:
                   1985:       if (clen == 0) break;
                   1986: 
                   1987: #ifdef SUPPORT_UTF8
                   1988:       if (utf8)
                   1989:         {
                   1990:         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
                   1991:           {
                   1992:           unsigned int othercase;
                   1993:           if (c < 128) othercase = fcc[c]; else
                   1994: 
                   1995:           /* If we have Unicode property support, we can use it to test the
                   1996:           other case of the character. */
                   1997: 
                   1998: #ifdef SUPPORT_UCP
                   1999:           othercase = UCD_OTHERCASE(c);
                   2000: #else
                   2001:           othercase = NOTACHAR;
                   2002: #endif
                   2003: 
                   2004:           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2005:           }
                   2006:         }
                   2007:       else
                   2008: #endif  /* SUPPORT_UTF8 */
                   2009: 
                   2010:       /* Non-UTF-8 mode */
                   2011:         {
                   2012:         if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
                   2013:         }
                   2014:       break;
                   2015: 
                   2016: 
                   2017: #ifdef SUPPORT_UCP
                   2018:       /*-----------------------------------------------------------------*/
                   2019:       /* This is a tricky one because it can match more than one character.
                   2020:       Find out how many characters to skip, and then set up a negative state
                   2021:       to wait for them to pass before continuing. */
                   2022: 
                   2023:       case OP_EXTUNI:
                   2024:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   2025:         {
                   2026:         const uschar *nptr = ptr + clen;
                   2027:         int ncount = 0;
                   2028:         while (nptr < end_subject)
                   2029:           {
                   2030:           int nclen = 1;
                   2031:           GETCHARLEN(c, nptr, nclen);
                   2032:           if (UCD_CATEGORY(c) != ucp_M) break;
                   2033:           ncount++;
                   2034:           nptr += nclen;
                   2035:           }
                   2036:         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
                   2037:         }
                   2038:       break;
                   2039: #endif
                   2040: 
                   2041:       /*-----------------------------------------------------------------*/
                   2042:       /* This is a tricky like EXTUNI because it too can match more than one
                   2043:       character (when CR is followed by LF). In this case, set up a negative
                   2044:       state to wait for one character to pass before continuing. */
                   2045: 
                   2046:       case OP_ANYNL:
                   2047:       if (clen > 0) switch(c)
                   2048:         {
                   2049:         case 0x000b:
                   2050:         case 0x000c:
                   2051:         case 0x0085:
                   2052:         case 0x2028:
                   2053:         case 0x2029:
                   2054:         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   2055: 
                   2056:         case 0x000a:
                   2057:         ADD_NEW(state_offset + 1, 0);
                   2058:         break;
                   2059: 
                   2060:         case 0x000d:
                   2061:         if (ptr + 1 < end_subject && ptr[1] == 0x0a)
                   2062:           {
                   2063:           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                   2064:           }
                   2065:         else
                   2066:           {
                   2067:           ADD_NEW(state_offset + 1, 0);
                   2068:           }
                   2069:         break;
                   2070:         }
                   2071:       break;
                   2072: 
                   2073:       /*-----------------------------------------------------------------*/
                   2074:       case OP_NOT_VSPACE:
                   2075:       if (clen > 0) switch(c)
                   2076:         {
                   2077:         case 0x000a:
                   2078:         case 0x000b:
                   2079:         case 0x000c:
                   2080:         case 0x000d:
                   2081:         case 0x0085:
                   2082:         case 0x2028:
                   2083:         case 0x2029:
                   2084:         break;
                   2085: 
                   2086:         default:
                   2087:         ADD_NEW(state_offset + 1, 0);
                   2088:         break;
                   2089:         }
                   2090:       break;
                   2091: 
                   2092:       /*-----------------------------------------------------------------*/
                   2093:       case OP_VSPACE:
                   2094:       if (clen > 0) switch(c)
                   2095:         {
                   2096:         case 0x000a:
                   2097:         case 0x000b:
                   2098:         case 0x000c:
                   2099:         case 0x000d:
                   2100:         case 0x0085:
                   2101:         case 0x2028:
                   2102:         case 0x2029:
                   2103:         ADD_NEW(state_offset + 1, 0);
                   2104:         break;
                   2105: 
                   2106:         default: break;
                   2107:         }
                   2108:       break;
                   2109: 
                   2110:       /*-----------------------------------------------------------------*/
                   2111:       case OP_NOT_HSPACE:
                   2112:       if (clen > 0) switch(c)
                   2113:         {
                   2114:         case 0x09:      /* HT */
                   2115:         case 0x20:      /* SPACE */
                   2116:         case 0xa0:      /* NBSP */
                   2117:         case 0x1680:    /* OGHAM SPACE MARK */
                   2118:         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2119:         case 0x2000:    /* EN QUAD */
                   2120:         case 0x2001:    /* EM QUAD */
                   2121:         case 0x2002:    /* EN SPACE */
                   2122:         case 0x2003:    /* EM SPACE */
                   2123:         case 0x2004:    /* THREE-PER-EM SPACE */
                   2124:         case 0x2005:    /* FOUR-PER-EM SPACE */
                   2125:         case 0x2006:    /* SIX-PER-EM SPACE */
                   2126:         case 0x2007:    /* FIGURE SPACE */
                   2127:         case 0x2008:    /* PUNCTUATION SPACE */
                   2128:         case 0x2009:    /* THIN SPACE */
                   2129:         case 0x200A:    /* HAIR SPACE */
                   2130:         case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2131:         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2132:         case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2133:         break;
                   2134: 
                   2135:         default:
                   2136:         ADD_NEW(state_offset + 1, 0);
                   2137:         break;
                   2138:         }
                   2139:       break;
                   2140: 
                   2141:       /*-----------------------------------------------------------------*/
                   2142:       case OP_HSPACE:
                   2143:       if (clen > 0) switch(c)
                   2144:         {
                   2145:         case 0x09:      /* HT */
                   2146:         case 0x20:      /* SPACE */
                   2147:         case 0xa0:      /* NBSP */
                   2148:         case 0x1680:    /* OGHAM SPACE MARK */
                   2149:         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2150:         case 0x2000:    /* EN QUAD */
                   2151:         case 0x2001:    /* EM QUAD */
                   2152:         case 0x2002:    /* EN SPACE */
                   2153:         case 0x2003:    /* EM SPACE */
                   2154:         case 0x2004:    /* THREE-PER-EM SPACE */
                   2155:         case 0x2005:    /* FOUR-PER-EM SPACE */
                   2156:         case 0x2006:    /* SIX-PER-EM SPACE */
                   2157:         case 0x2007:    /* FIGURE SPACE */
                   2158:         case 0x2008:    /* PUNCTUATION SPACE */
                   2159:         case 0x2009:    /* THIN SPACE */
                   2160:         case 0x200A:    /* HAIR SPACE */
                   2161:         case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2162:         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2163:         case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2164:         ADD_NEW(state_offset + 1, 0);
                   2165:         break;
                   2166:         }
                   2167:       break;
                   2168: 
                   2169:       /*-----------------------------------------------------------------*/
                   2170:       /* Match a negated single character casefully. This is only used for
                   2171:       one-byte characters, that is, we know that d < 256. The character we are
                   2172:       checking (c) can be multibyte. */
                   2173: 
                   2174:       case OP_NOT:
                   2175:       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2176:       break;
                   2177: 
                   2178:       /*-----------------------------------------------------------------*/
                   2179:       /* Match a negated single character caselessly. This is only used for
                   2180:       one-byte characters, that is, we know that d < 256. The character we are
                   2181:       checking (c) can be multibyte. */
                   2182: 
                   2183:       case OP_NOTI:
                   2184:       if (clen > 0 && c != d && c != fcc[d])
                   2185:         { ADD_NEW(state_offset + dlen + 1, 0); }
                   2186:       break;
                   2187: 
                   2188:       /*-----------------------------------------------------------------*/
                   2189:       case OP_PLUSI:
                   2190:       case OP_MINPLUSI:
                   2191:       case OP_POSPLUSI:
                   2192:       case OP_NOTPLUSI:
                   2193:       case OP_NOTMINPLUSI:
                   2194:       case OP_NOTPOSPLUSI:
                   2195:       caseless = TRUE;
                   2196:       codevalue -= OP_STARI - OP_STAR;
                   2197: 
                   2198:       /* Fall through */
                   2199:       case OP_PLUS:
                   2200:       case OP_MINPLUS:
                   2201:       case OP_POSPLUS:
                   2202:       case OP_NOTPLUS:
                   2203:       case OP_NOTMINPLUS:
                   2204:       case OP_NOTPOSPLUS:
                   2205:       count = current_state->count;  /* Already matched */
                   2206:       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
                   2207:       if (clen > 0)
                   2208:         {
                   2209:         unsigned int otherd = NOTACHAR;
                   2210:         if (caseless)
                   2211:           {
                   2212: #ifdef SUPPORT_UTF8
                   2213:           if (utf8 && d >= 128)
                   2214:             {
                   2215: #ifdef SUPPORT_UCP
                   2216:             otherd = UCD_OTHERCASE(d);
                   2217: #endif  /* SUPPORT_UCP */
                   2218:             }
                   2219:           else
                   2220: #endif  /* SUPPORT_UTF8 */
                   2221:           otherd = fcc[d];
                   2222:           }
                   2223:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2224:           {
                   2225:           if (count > 0 &&
                   2226:               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
                   2227:             {
                   2228:             active_count--;             /* Remove non-match possibility */
                   2229:             next_active_state--;
                   2230:             }
                   2231:           count++;
                   2232:           ADD_NEW(state_offset, count);
                   2233:           }
                   2234:         }
                   2235:       break;
                   2236: 
                   2237:       /*-----------------------------------------------------------------*/
                   2238:       case OP_QUERYI:
                   2239:       case OP_MINQUERYI:
                   2240:       case OP_POSQUERYI:
                   2241:       case OP_NOTQUERYI:
                   2242:       case OP_NOTMINQUERYI:
                   2243:       case OP_NOTPOSQUERYI:
                   2244:       caseless = TRUE;
                   2245:       codevalue -= OP_STARI - OP_STAR;
                   2246:       /* Fall through */
                   2247:       case OP_QUERY:
                   2248:       case OP_MINQUERY:
                   2249:       case OP_POSQUERY:
                   2250:       case OP_NOTQUERY:
                   2251:       case OP_NOTMINQUERY:
                   2252:       case OP_NOTPOSQUERY:
                   2253:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2254:       if (clen > 0)
                   2255:         {
                   2256:         unsigned int otherd = NOTACHAR;
                   2257:         if (caseless)
                   2258:           {
                   2259: #ifdef SUPPORT_UTF8
                   2260:           if (utf8 && d >= 128)
                   2261:             {
                   2262: #ifdef SUPPORT_UCP
                   2263:             otherd = UCD_OTHERCASE(d);
                   2264: #endif  /* SUPPORT_UCP */
                   2265:             }
                   2266:           else
                   2267: #endif  /* SUPPORT_UTF8 */
                   2268:           otherd = fcc[d];
                   2269:           }
                   2270:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2271:           {
                   2272:           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
                   2273:             {
                   2274:             active_count--;            /* Remove non-match possibility */
                   2275:             next_active_state--;
                   2276:             }
                   2277:           ADD_NEW(state_offset + dlen + 1, 0);
                   2278:           }
                   2279:         }
                   2280:       break;
                   2281: 
                   2282:       /*-----------------------------------------------------------------*/
                   2283:       case OP_STARI:
                   2284:       case OP_MINSTARI:
                   2285:       case OP_POSSTARI:
                   2286:       case OP_NOTSTARI:
                   2287:       case OP_NOTMINSTARI:
                   2288:       case OP_NOTPOSSTARI:
                   2289:       caseless = TRUE;
                   2290:       codevalue -= OP_STARI - OP_STAR;
                   2291:       /* Fall through */
                   2292:       case OP_STAR:
                   2293:       case OP_MINSTAR:
                   2294:       case OP_POSSTAR:
                   2295:       case OP_NOTSTAR:
                   2296:       case OP_NOTMINSTAR:
                   2297:       case OP_NOTPOSSTAR:
                   2298:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2299:       if (clen > 0)
                   2300:         {
                   2301:         unsigned int otherd = NOTACHAR;
                   2302:         if (caseless)
                   2303:           {
                   2304: #ifdef SUPPORT_UTF8
                   2305:           if (utf8 && d >= 128)
                   2306:             {
                   2307: #ifdef SUPPORT_UCP
                   2308:             otherd = UCD_OTHERCASE(d);
                   2309: #endif  /* SUPPORT_UCP */
                   2310:             }
                   2311:           else
                   2312: #endif  /* SUPPORT_UTF8 */
                   2313:           otherd = fcc[d];
                   2314:           }
                   2315:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2316:           {
                   2317:           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
                   2318:             {
                   2319:             active_count--;            /* Remove non-match possibility */
                   2320:             next_active_state--;
                   2321:             }
                   2322:           ADD_NEW(state_offset, 0);
                   2323:           }
                   2324:         }
                   2325:       break;
                   2326: 
                   2327:       /*-----------------------------------------------------------------*/
                   2328:       case OP_EXACTI:
                   2329:       case OP_NOTEXACTI:
                   2330:       caseless = TRUE;
                   2331:       codevalue -= OP_STARI - OP_STAR;
                   2332:       /* Fall through */
                   2333:       case OP_EXACT:
                   2334:       case OP_NOTEXACT:
                   2335:       count = current_state->count;  /* Number already matched */
                   2336:       if (clen > 0)
                   2337:         {
                   2338:         unsigned int otherd = NOTACHAR;
                   2339:         if (caseless)
                   2340:           {
                   2341: #ifdef SUPPORT_UTF8
                   2342:           if (utf8 && d >= 128)
                   2343:             {
                   2344: #ifdef SUPPORT_UCP
                   2345:             otherd = UCD_OTHERCASE(d);
                   2346: #endif  /* SUPPORT_UCP */
                   2347:             }
                   2348:           else
                   2349: #endif  /* SUPPORT_UTF8 */
                   2350:           otherd = fcc[d];
                   2351:           }
                   2352:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2353:           {
                   2354:           if (++count >= GET2(code, 1))
                   2355:             { ADD_NEW(state_offset + dlen + 3, 0); }
                   2356:           else
                   2357:             { ADD_NEW(state_offset, count); }
                   2358:           }
                   2359:         }
                   2360:       break;
                   2361: 
                   2362:       /*-----------------------------------------------------------------*/
                   2363:       case OP_UPTOI:
                   2364:       case OP_MINUPTOI:
                   2365:       case OP_POSUPTOI:
                   2366:       case OP_NOTUPTOI:
                   2367:       case OP_NOTMINUPTOI:
                   2368:       case OP_NOTPOSUPTOI:
                   2369:       caseless = TRUE;
                   2370:       codevalue -= OP_STARI - OP_STAR;
                   2371:       /* Fall through */
                   2372:       case OP_UPTO:
                   2373:       case OP_MINUPTO:
                   2374:       case OP_POSUPTO:
                   2375:       case OP_NOTUPTO:
                   2376:       case OP_NOTMINUPTO:
                   2377:       case OP_NOTPOSUPTO:
                   2378:       ADD_ACTIVE(state_offset + dlen + 3, 0);
                   2379:       count = current_state->count;  /* Number already matched */
                   2380:       if (clen > 0)
                   2381:         {
                   2382:         unsigned int otherd = NOTACHAR;
                   2383:         if (caseless)
                   2384:           {
                   2385: #ifdef SUPPORT_UTF8
                   2386:           if (utf8 && d >= 128)
                   2387:             {
                   2388: #ifdef SUPPORT_UCP
                   2389:             otherd = UCD_OTHERCASE(d);
                   2390: #endif  /* SUPPORT_UCP */
                   2391:             }
                   2392:           else
                   2393: #endif  /* SUPPORT_UTF8 */
                   2394:           otherd = fcc[d];
                   2395:           }
                   2396:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2397:           {
                   2398:           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
                   2399:             {
                   2400:             active_count--;             /* Remove non-match possibility */
                   2401:             next_active_state--;
                   2402:             }
                   2403:           if (++count >= GET2(code, 1))
                   2404:             { ADD_NEW(state_offset + dlen + 3, 0); }
                   2405:           else
                   2406:             { ADD_NEW(state_offset, count); }
                   2407:           }
                   2408:         }
                   2409:       break;
                   2410: 
                   2411: 
                   2412: /* ========================================================================== */
                   2413:       /* These are the class-handling opcodes */
                   2414: 
                   2415:       case OP_CLASS:
                   2416:       case OP_NCLASS:
                   2417:       case OP_XCLASS:
                   2418:         {
                   2419:         BOOL isinclass = FALSE;
                   2420:         int next_state_offset;
                   2421:         const uschar *ecode;
                   2422: 
                   2423:         /* For a simple class, there is always just a 32-byte table, and we
                   2424:         can set isinclass from it. */
                   2425: 
                   2426:         if (codevalue != OP_XCLASS)
                   2427:           {
                   2428:           ecode = code + 33;
                   2429:           if (clen > 0)
                   2430:             {
                   2431:             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
                   2432:               ((code[1 + c/8] & (1 << (c&7))) != 0);
                   2433:             }
                   2434:           }
                   2435: 
                   2436:         /* An extended class may have a table or a list of single characters,
                   2437:         ranges, or both, and it may be positive or negative. There's a
                   2438:         function that sorts all this out. */
                   2439: 
                   2440:         else
                   2441:          {
                   2442:          ecode = code + GET(code, 1);
                   2443:          if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
                   2444:          }
                   2445: 
                   2446:         /* At this point, isinclass is set for all kinds of class, and ecode
                   2447:         points to the byte after the end of the class. If there is a
                   2448:         quantifier, this is where it will be. */
                   2449: 
                   2450:         next_state_offset = (int)(ecode - start_code);
                   2451: 
                   2452:         switch (*ecode)
                   2453:           {
                   2454:           case OP_CRSTAR:
                   2455:           case OP_CRMINSTAR:
                   2456:           ADD_ACTIVE(next_state_offset + 1, 0);
                   2457:           if (isinclass) { ADD_NEW(state_offset, 0); }
                   2458:           break;
                   2459: 
                   2460:           case OP_CRPLUS:
                   2461:           case OP_CRMINPLUS:
                   2462:           count = current_state->count;  /* Already matched */
                   2463:           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
                   2464:           if (isinclass) { count++; ADD_NEW(state_offset, count); }
                   2465:           break;
                   2466: 
                   2467:           case OP_CRQUERY:
                   2468:           case OP_CRMINQUERY:
                   2469:           ADD_ACTIVE(next_state_offset + 1, 0);
                   2470:           if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
                   2471:           break;
                   2472: 
                   2473:           case OP_CRRANGE:
                   2474:           case OP_CRMINRANGE:
                   2475:           count = current_state->count;  /* Already matched */
                   2476:           if (count >= GET2(ecode, 1))
                   2477:             { ADD_ACTIVE(next_state_offset + 5, 0); }
                   2478:           if (isinclass)
                   2479:             {
                   2480:             int max = GET2(ecode, 3);
                   2481:             if (++count >= max && max != 0)   /* Max 0 => no limit */
                   2482:               { ADD_NEW(next_state_offset + 5, 0); }
                   2483:             else
                   2484:               { ADD_NEW(state_offset, count); }
                   2485:             }
                   2486:           break;
                   2487: 
                   2488:           default:
                   2489:           if (isinclass) { ADD_NEW(next_state_offset, 0); }
                   2490:           break;
                   2491:           }
                   2492:         }
                   2493:       break;
                   2494: 
                   2495: /* ========================================================================== */
                   2496:       /* These are the opcodes for fancy brackets of various kinds. We have
                   2497:       to use recursion in order to handle them. The "always failing" assertion
                   2498:       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
                   2499:       though the other "backtracking verbs" are not supported. */
                   2500: 
                   2501:       case OP_FAIL:
                   2502:       forced_fail++;    /* Count FAILs for multiple states */
                   2503:       break;
                   2504: 
                   2505:       case OP_ASSERT:
                   2506:       case OP_ASSERT_NOT:
                   2507:       case OP_ASSERTBACK:
                   2508:       case OP_ASSERTBACK_NOT:
                   2509:         {
                   2510:         int rc;
                   2511:         int local_offsets[2];
                   2512:         int local_workspace[1000];
                   2513:         const uschar *endasscode = code + GET(code, 1);
                   2514: 
                   2515:         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2516: 
                   2517:         rc = internal_dfa_exec(
                   2518:           md,                                   /* static match data */
                   2519:           code,                                 /* this subexpression's code */
                   2520:           ptr,                                  /* where we currently are */
                   2521:           (int)(ptr - start_subject),           /* start offset */
                   2522:           local_offsets,                        /* offset vector */
                   2523:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2524:           local_workspace,                      /* workspace vector */
                   2525:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2526:           rlevel);                              /* function recursion level */
                   2527: 
                   2528:         if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2529:         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
                   2530:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2531:         }
                   2532:       break;
                   2533: 
                   2534:       /*-----------------------------------------------------------------*/
                   2535:       case OP_COND:
                   2536:       case OP_SCOND:
                   2537:         {
                   2538:         int local_offsets[1000];
                   2539:         int local_workspace[1000];
                   2540:         int codelink = GET(code, 1);
                   2541:         int condcode;
                   2542: 
                   2543:         /* Because of the way auto-callout works during compile, a callout item
                   2544:         is inserted between OP_COND and an assertion condition. This does not
                   2545:         happen for the other conditions. */
                   2546: 
                   2547:         if (code[LINK_SIZE+1] == OP_CALLOUT)
                   2548:           {
                   2549:           rrc = 0;
                   2550:           if (pcre_callout != NULL)
                   2551:             {
                   2552:             pcre_callout_block cb;
                   2553:             cb.version          = 1;   /* Version 1 of the callout block */
                   2554:             cb.callout_number   = code[LINK_SIZE+2];
                   2555:             cb.offset_vector    = offsets;
                   2556:             cb.subject          = (PCRE_SPTR)start_subject;
                   2557:             cb.subject_length   = (int)(end_subject - start_subject);
                   2558:             cb.start_match      = (int)(current_subject - start_subject);
                   2559:             cb.current_position = (int)(ptr - start_subject);
                   2560:             cb.pattern_position = GET(code, LINK_SIZE + 3);
                   2561:             cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
                   2562:             cb.capture_top      = 1;
                   2563:             cb.capture_last     = -1;
                   2564:             cb.callout_data     = md->callout_data;
                   2565:             cb.mark             = NULL;   /* No (*MARK) support */
                   2566:             if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
                   2567:             }
                   2568:           if (rrc > 0) break;                      /* Fail this thread */
                   2569:           code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
                   2570:           }
                   2571: 
                   2572:         condcode = code[LINK_SIZE+1];
                   2573: 
                   2574:         /* Back reference conditions are not supported */
                   2575: 
                   2576:         if (condcode == OP_CREF || condcode == OP_NCREF)
                   2577:           return PCRE_ERROR_DFA_UCOND;
                   2578: 
                   2579:         /* The DEFINE condition is always false */
                   2580: 
                   2581:         if (condcode == OP_DEF)
                   2582:           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2583: 
                   2584:         /* The only supported version of OP_RREF is for the value RREF_ANY,
                   2585:         which means "test if in any recursion". We can't test for specifically
                   2586:         recursed groups. */
                   2587: 
                   2588:         else if (condcode == OP_RREF || condcode == OP_NRREF)
                   2589:           {
                   2590:           int value = GET2(code, LINK_SIZE+2);
                   2591:           if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
                   2592:           if (md->recursive != NULL)
                   2593:             { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
                   2594:           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2595:           }
                   2596: 
                   2597:         /* Otherwise, the condition is an assertion */
                   2598: 
                   2599:         else
                   2600:           {
                   2601:           int rc;
                   2602:           const uschar *asscode = code + LINK_SIZE + 1;
                   2603:           const uschar *endasscode = asscode + GET(asscode, 1);
                   2604: 
                   2605:           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2606: 
                   2607:           rc = internal_dfa_exec(
                   2608:             md,                                   /* fixed match data */
                   2609:             asscode,                              /* this subexpression's code */
                   2610:             ptr,                                  /* where we currently are */
                   2611:             (int)(ptr - start_subject),           /* start offset */
                   2612:             local_offsets,                        /* offset vector */
                   2613:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2614:             local_workspace,                      /* workspace vector */
                   2615:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2616:             rlevel);                              /* function recursion level */
                   2617: 
                   2618:           if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2619:           if ((rc >= 0) ==
                   2620:                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
                   2621:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2622:           else
                   2623:             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2624:           }
                   2625:         }
                   2626:       break;
                   2627: 
                   2628:       /*-----------------------------------------------------------------*/
                   2629:       case OP_RECURSE:
                   2630:         {
                   2631:         dfa_recursion_info *ri;
                   2632:         int local_offsets[1000];
                   2633:         int local_workspace[1000];
                   2634:         const uschar *callpat = start_code + GET(code, 1);
                   2635:         int recno = (callpat == md->start_code)? 0 :
                   2636:           GET2(callpat, 1 + LINK_SIZE);
                   2637:         int rc;
                   2638: 
                   2639:         DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
                   2640: 
                   2641:         /* Check for repeating a recursion without advancing the subject
                   2642:         pointer. This should catch convoluted mutual recursions. (Some simple
                   2643:         cases are caught at compile time.) */
                   2644: 
                   2645:         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   2646:           if (recno == ri->group_num && ptr == ri->subject_position)
                   2647:             return PCRE_ERROR_RECURSELOOP;
                   2648: 
                   2649:         /* Remember this recursion and where we started it so as to
                   2650:         catch infinite loops. */
                   2651: 
                   2652:         new_recursive.group_num = recno;
                   2653:         new_recursive.subject_position = ptr;
                   2654:         new_recursive.prevrec = md->recursive;
                   2655:         md->recursive = &new_recursive;
                   2656: 
                   2657:         rc = internal_dfa_exec(
                   2658:           md,                                   /* fixed match data */
                   2659:           callpat,                              /* this subexpression's code */
                   2660:           ptr,                                  /* where we currently are */
                   2661:           (int)(ptr - start_subject),           /* start offset */
                   2662:           local_offsets,                        /* offset vector */
                   2663:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2664:           local_workspace,                      /* workspace vector */
                   2665:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2666:           rlevel);                              /* function recursion level */
                   2667: 
                   2668:         md->recursive = new_recursive.prevrec;  /* Done this recursion */
                   2669: 
                   2670:         DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
                   2671:           rc));
                   2672: 
                   2673:         /* Ran out of internal offsets */
                   2674: 
                   2675:         if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
                   2676: 
                   2677:         /* For each successful matched substring, set up the next state with a
                   2678:         count of characters to skip before trying it. Note that the count is in
                   2679:         characters, not bytes. */
                   2680: 
                   2681:         if (rc > 0)
                   2682:           {
                   2683:           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
                   2684:             {
                   2685:             const uschar *p = start_subject + local_offsets[rc];
                   2686:             const uschar *pp = start_subject + local_offsets[rc+1];
                   2687:             int charcount = local_offsets[rc+1] - local_offsets[rc];
                   2688:             while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
                   2689:             if (charcount > 0)
                   2690:               {
                   2691:               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
                   2692:               }
                   2693:             else
                   2694:               {
                   2695:               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
                   2696:               }
                   2697:             }
                   2698:           }
                   2699:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2700:         }
                   2701:       break;
                   2702: 
                   2703:       /*-----------------------------------------------------------------*/
                   2704:       case OP_BRAPOS:
                   2705:       case OP_SBRAPOS:
                   2706:       case OP_CBRAPOS:
                   2707:       case OP_SCBRAPOS:
                   2708:       case OP_BRAPOSZERO:
                   2709:         {
                   2710:         int charcount, matched_count;
                   2711:         const uschar *local_ptr = ptr;
                   2712:         BOOL allow_zero;
                   2713: 
                   2714:         if (codevalue == OP_BRAPOSZERO)
                   2715:           {
                   2716:           allow_zero = TRUE;
                   2717:           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
                   2718:           }
                   2719:         else allow_zero = FALSE;
                   2720: 
                   2721:         /* Loop to match the subpattern as many times as possible as if it were
                   2722:         a complete pattern. */
                   2723: 
                   2724:         for (matched_count = 0;; matched_count++)
                   2725:           {
                   2726:           int local_offsets[2];
                   2727:           int local_workspace[1000];
                   2728: 
                   2729:           int rc = internal_dfa_exec(
                   2730:             md,                                   /* fixed match data */
                   2731:             code,                                 /* this subexpression's code */
                   2732:             local_ptr,                            /* where we currently are */
                   2733:             (int)(ptr - start_subject),           /* start offset */
                   2734:             local_offsets,                        /* offset vector */
                   2735:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2736:             local_workspace,                      /* workspace vector */
                   2737:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2738:             rlevel);                              /* function recursion level */
                   2739: 
                   2740:           /* Failed to match */
                   2741: 
                   2742:           if (rc < 0)
                   2743:             {
                   2744:             if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2745:             break;
                   2746:             }
                   2747: 
                   2748:           /* Matched: break the loop if zero characters matched. */
                   2749: 
                   2750:           charcount = local_offsets[1] - local_offsets[0];
                   2751:           if (charcount == 0) break;
                   2752:           local_ptr += charcount;    /* Advance temporary position ptr */
                   2753:           }
                   2754: 
                   2755:         /* At this point we have matched the subpattern matched_count
                   2756:         times, and local_ptr is pointing to the character after the end of the
                   2757:         last match. */
                   2758: 
                   2759:         if (matched_count > 0 || allow_zero)
                   2760:           {
                   2761:           const uschar *end_subpattern = code;
                   2762:           int next_state_offset;
                   2763: 
                   2764:           do { end_subpattern += GET(end_subpattern, 1); }
                   2765:             while (*end_subpattern == OP_ALT);
                   2766:           next_state_offset =
                   2767:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2768: 
                   2769:           /* Optimization: if there are no more active states, and there
                   2770:           are no new states yet set up, then skip over the subject string
                   2771:           right here, to save looping. Otherwise, set up the new state to swing
                   2772:           into action when the end of the matched substring is reached. */
                   2773: 
                   2774:           if (i + 1 >= active_count && new_count == 0)
                   2775:             {
                   2776:             ptr = local_ptr;
                   2777:             clen = 0;
                   2778:             ADD_NEW(next_state_offset, 0);
                   2779:             }
                   2780:           else
                   2781:             {
                   2782:             const uschar *p = ptr;
                   2783:             const uschar *pp = local_ptr;
                   2784:             charcount = (int)(pp - p);
                   2785:             while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
                   2786:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2787:             }
                   2788:           }
                   2789:         }
                   2790:       break;
                   2791: 
                   2792:       /*-----------------------------------------------------------------*/
                   2793:       case OP_ONCE:
                   2794:       case OP_ONCE_NC:
                   2795:         {
                   2796:         int local_offsets[2];
                   2797:         int local_workspace[1000];
                   2798: 
                   2799:         int rc = internal_dfa_exec(
                   2800:           md,                                   /* fixed match data */
                   2801:           code,                                 /* this subexpression's code */
                   2802:           ptr,                                  /* where we currently are */
                   2803:           (int)(ptr - start_subject),           /* start offset */
                   2804:           local_offsets,                        /* offset vector */
                   2805:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2806:           local_workspace,                      /* workspace vector */
                   2807:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2808:           rlevel);                              /* function recursion level */
                   2809: 
                   2810:         if (rc >= 0)
                   2811:           {
                   2812:           const uschar *end_subpattern = code;
                   2813:           int charcount = local_offsets[1] - local_offsets[0];
                   2814:           int next_state_offset, repeat_state_offset;
                   2815: 
                   2816:           do { end_subpattern += GET(end_subpattern, 1); }
                   2817:             while (*end_subpattern == OP_ALT);
                   2818:           next_state_offset =
                   2819:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2820: 
                   2821:           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
                   2822:           arrange for the repeat state also to be added to the relevant list.
                   2823:           Calculate the offset, or set -1 for no repeat. */
                   2824: 
                   2825:           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
                   2826:                                  *end_subpattern == OP_KETRMIN)?
                   2827:             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
                   2828: 
                   2829:           /* If we have matched an empty string, add the next state at the
                   2830:           current character pointer. This is important so that the duplicate
                   2831:           checking kicks in, which is what breaks infinite loops that match an
                   2832:           empty string. */
                   2833: 
                   2834:           if (charcount == 0)
                   2835:             {
                   2836:             ADD_ACTIVE(next_state_offset, 0);
                   2837:             }
                   2838: 
                   2839:           /* Optimization: if there are no more active states, and there
                   2840:           are no new states yet set up, then skip over the subject string
                   2841:           right here, to save looping. Otherwise, set up the new state to swing
                   2842:           into action when the end of the matched substring is reached. */
                   2843: 
                   2844:           else if (i + 1 >= active_count && new_count == 0)
                   2845:             {
                   2846:             ptr += charcount;
                   2847:             clen = 0;
                   2848:             ADD_NEW(next_state_offset, 0);
                   2849: 
                   2850:             /* If we are adding a repeat state at the new character position,
                   2851:             we must fudge things so that it is the only current state.
                   2852:             Otherwise, it might be a duplicate of one we processed before, and
                   2853:             that would cause it to be skipped. */
                   2854: 
                   2855:             if (repeat_state_offset >= 0)
                   2856:               {
                   2857:               next_active_state = active_states;
                   2858:               active_count = 0;
                   2859:               i = -1;
                   2860:               ADD_ACTIVE(repeat_state_offset, 0);
                   2861:               }
                   2862:             }
                   2863:           else
                   2864:             {
                   2865:             const uschar *p = start_subject + local_offsets[0];
                   2866:             const uschar *pp = start_subject + local_offsets[1];
                   2867:             while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
                   2868:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2869:             if (repeat_state_offset >= 0)
                   2870:               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
                   2871:             }
                   2872:           }
                   2873:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2874:         }
                   2875:       break;
                   2876: 
                   2877: 
                   2878: /* ========================================================================== */
                   2879:       /* Handle callouts */
                   2880: 
                   2881:       case OP_CALLOUT:
                   2882:       rrc = 0;
                   2883:       if (pcre_callout != NULL)
                   2884:         {
                   2885:         pcre_callout_block cb;
                   2886:         cb.version          = 1;   /* Version 1 of the callout block */
                   2887:         cb.callout_number   = code[1];
                   2888:         cb.offset_vector    = offsets;
                   2889:         cb.subject          = (PCRE_SPTR)start_subject;
                   2890:         cb.subject_length   = (int)(end_subject - start_subject);
                   2891:         cb.start_match      = (int)(current_subject - start_subject);
                   2892:         cb.current_position = (int)(ptr - start_subject);
                   2893:         cb.pattern_position = GET(code, 2);
                   2894:         cb.next_item_length = GET(code, 2 + LINK_SIZE);
                   2895:         cb.capture_top      = 1;
                   2896:         cb.capture_last     = -1;
                   2897:         cb.callout_data     = md->callout_data;
                   2898:         cb.mark             = NULL;   /* No (*MARK) support */
                   2899:         if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
                   2900:         }
                   2901:       if (rrc == 0)
                   2902:         { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
                   2903:       break;
                   2904: 
                   2905: 
                   2906: /* ========================================================================== */
                   2907:       default:        /* Unsupported opcode */
                   2908:       return PCRE_ERROR_DFA_UITEM;
                   2909:       }
                   2910: 
                   2911:     NEXT_ACTIVE_STATE: continue;
                   2912: 
                   2913:     }      /* End of loop scanning active states */
                   2914: 
                   2915:   /* We have finished the processing at the current subject character. If no
                   2916:   new states have been set for the next character, we have found all the
                   2917:   matches that we are going to find. If we are at the top level and partial
                   2918:   matching has been requested, check for appropriate conditions.
                   2919: 
                   2920:   The "forced_ fail" variable counts the number of (*F) encountered for the
                   2921:   character. If it is equal to the original active_count (saved in
                   2922:   workspace[1]) it means that (*F) was found on every active state. In this
                   2923:   case we don't want to give a partial match.
                   2924: 
                   2925:   The "could_continue" variable is true if a state could have continued but
                   2926:   for the fact that the end of the subject was reached. */
                   2927: 
                   2928:   if (new_count <= 0)
                   2929:     {
                   2930:     if (rlevel == 1 &&                               /* Top level, and */
                   2931:         could_continue &&                            /* Some could go on */
                   2932:         forced_fail != workspace[1] &&               /* Not all forced fail & */
                   2933:         (                                            /* either... */
                   2934:         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
                   2935:         ||                                           /* or... */
                   2936:         ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
                   2937:          match_count < 0)                            /* no matches */
                   2938:         ) &&                                         /* And... */
                   2939:         ptr >= end_subject &&                  /* Reached end of subject */
                   2940:         ptr > md->start_used_ptr)              /* Inspected non-empty string */
                   2941:       {
                   2942:       if (offsetcount >= 2)
                   2943:         {
                   2944:         offsets[0] = (int)(md->start_used_ptr - start_subject);
                   2945:         offsets[1] = (int)(end_subject - start_subject);
                   2946:         }
                   2947:       match_count = PCRE_ERROR_PARTIAL;
                   2948:       }
                   2949: 
                   2950:     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                   2951:       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
                   2952:       rlevel*2-2, SP));
                   2953:     break;        /* In effect, "return", but see the comment below */
                   2954:     }
                   2955: 
                   2956:   /* One or more states are active for the next character. */
                   2957: 
                   2958:   ptr += clen;    /* Advance to next subject character */
                   2959:   }               /* Loop to move along the subject string */
                   2960: 
                   2961: /* Control gets here from "break" a few lines above. We do it this way because
                   2962: if we use "return" above, we have compiler trouble. Some compilers warn if
                   2963: there's nothing here because they think the function doesn't return a value. On
                   2964: the other hand, if we put a dummy statement here, some more clever compilers
                   2965: complain that it can't be reached. Sigh. */
                   2966: 
                   2967: return match_count;
                   2968: }
                   2969: 
                   2970: 
                   2971: 
                   2972: 
                   2973: /*************************************************
                   2974: *    Execute a Regular Expression - DFA engine   *
                   2975: *************************************************/
                   2976: 
                   2977: /* This external function applies a compiled re to a subject string using a DFA
                   2978: engine. This function calls the internal function multiple times if the pattern
                   2979: is not anchored.
                   2980: 
                   2981: Arguments:
                   2982:   argument_re     points to the compiled expression
                   2983:   extra_data      points to extra data or is NULL
                   2984:   subject         points to the subject string
                   2985:   length          length of subject string (may contain binary zeros)
                   2986:   start_offset    where to start in the subject string
                   2987:   options         option bits
                   2988:   offsets         vector of match offsets
                   2989:   offsetcount     size of same
                   2990:   workspace       workspace vector
                   2991:   wscount         size of same
                   2992: 
                   2993: Returns:          > 0 => number of match offset pairs placed in offsets
                   2994:                   = 0 => offsets overflowed; longest matches are present
                   2995:                    -1 => failed to match
                   2996:                  < -1 => some kind of unexpected problem
                   2997: */
                   2998: 
                   2999: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3000: pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   3001:   const char *subject, int length, int start_offset, int options, int *offsets,
                   3002:   int offsetcount, int *workspace, int wscount)
                   3003: {
                   3004: real_pcre *re = (real_pcre *)argument_re;
                   3005: dfa_match_data match_block;
                   3006: dfa_match_data *md = &match_block;
                   3007: BOOL utf8, anchored, startline, firstline;
                   3008: const uschar *current_subject, *end_subject, *lcc;
                   3009: 
                   3010: pcre_study_data internal_study;
                   3011: const pcre_study_data *study = NULL;
                   3012: real_pcre internal_re;
                   3013: 
                   3014: const uschar *req_byte_ptr;
                   3015: const uschar *start_bits = NULL;
                   3016: BOOL first_byte_caseless = FALSE;
                   3017: BOOL req_byte_caseless = FALSE;
                   3018: int first_byte = -1;
                   3019: int req_byte = -1;
                   3020: int req_byte2 = -1;
                   3021: int newline;
                   3022: 
                   3023: /* Plausibility checks */
                   3024: 
                   3025: if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   3026: if (re == NULL || subject == NULL || workspace == NULL ||
                   3027:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   3028: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   3029: if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
                   3030: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   3031: 
                   3032: /* We need to find the pointer to any study data before we test for byte
                   3033: flipping, so we scan the extra_data block first. This may set two fields in the
                   3034: match block, so we must initialize them beforehand. However, the other fields
                   3035: in the match block must not be set until after the byte flipping. */
                   3036: 
                   3037: md->tables = re->tables;
                   3038: md->callout_data = NULL;
                   3039: 
                   3040: if (extra_data != NULL)
                   3041:   {
                   3042:   unsigned int flags = extra_data->flags;
                   3043:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   3044:     study = (const pcre_study_data *)extra_data->study_data;
                   3045:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
                   3046:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   3047:     return PCRE_ERROR_DFA_UMLIMIT;
                   3048:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   3049:     md->callout_data = extra_data->callout_data;
                   3050:   if ((flags & PCRE_EXTRA_TABLES) != 0)
                   3051:     md->tables = extra_data->tables;
                   3052:   }
                   3053: 
                   3054: /* Check that the first field in the block is the magic number. If it is not,
                   3055: test for a regex that was compiled on a host of opposite endianness. If this is
                   3056: the case, flipped values are put in internal_re and internal_study if there was
                   3057: study data too. */
                   3058: 
                   3059: if (re->magic_number != MAGIC_NUMBER)
                   3060:   {
                   3061:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   3062:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   3063:   if (study != NULL) study = &internal_study;
                   3064:   }
                   3065: 
                   3066: /* Set some local values */
                   3067: 
                   3068: current_subject = (const unsigned char *)subject + start_offset;
                   3069: end_subject = (const unsigned char *)subject + length;
                   3070: req_byte_ptr = current_subject - 1;
                   3071: 
                   3072: #ifdef SUPPORT_UTF8
                   3073: utf8 = (re->options & PCRE_UTF8) != 0;
                   3074: #else
                   3075: utf8 = FALSE;
                   3076: #endif
                   3077: 
                   3078: anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
                   3079:   (re->options & PCRE_ANCHORED) != 0;
                   3080: 
                   3081: /* The remaining fixed data for passing around. */
                   3082: 
                   3083: md->start_code = (const uschar *)argument_re +
                   3084:     re->name_table_offset + re->name_count * re->name_entry_size;
                   3085: md->start_subject = (const unsigned char *)subject;
                   3086: md->end_subject = end_subject;
                   3087: md->start_offset = start_offset;
                   3088: md->moptions = options;
                   3089: md->poptions = re->options;
                   3090: 
                   3091: /* If the BSR option is not set at match time, copy what was set
                   3092: at compile time. */
                   3093: 
                   3094: if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
                   3095:   {
                   3096:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   3097:     md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
                   3098: #ifdef BSR_ANYCRLF
                   3099:   else md->moptions |= PCRE_BSR_ANYCRLF;
                   3100: #endif
                   3101:   }
                   3102: 
                   3103: /* Handle different types of newline. The three bits give eight cases. If
                   3104: nothing is set at run time, whatever was used at compile time applies. */
                   3105: 
                   3106: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
                   3107:          PCRE_NEWLINE_BITS)
                   3108:   {
                   3109:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   3110:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   3111:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   3112:   case PCRE_NEWLINE_CR+
                   3113:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   3114:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   3115:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   3116:   default: return PCRE_ERROR_BADNEWLINE;
                   3117:   }
                   3118: 
                   3119: if (newline == -2)
                   3120:   {
                   3121:   md->nltype = NLTYPE_ANYCRLF;
                   3122:   }
                   3123: else if (newline < 0)
                   3124:   {
                   3125:   md->nltype = NLTYPE_ANY;
                   3126:   }
                   3127: else
                   3128:   {
                   3129:   md->nltype = NLTYPE_FIXED;
                   3130:   if (newline > 255)
                   3131:     {
                   3132:     md->nllen = 2;
                   3133:     md->nl[0] = (newline >> 8) & 255;
                   3134:     md->nl[1] = newline & 255;
                   3135:     }
                   3136:   else
                   3137:     {
                   3138:     md->nllen = 1;
                   3139:     md->nl[0] = newline;
                   3140:     }
                   3141:   }
                   3142: 
                   3143: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   3144: back the character offset. */
                   3145: 
                   3146: #ifdef SUPPORT_UTF8
                   3147: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   3148:   {
                   3149:   int erroroffset;
                   3150:   int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);
                   3151:   if (errorcode != 0)
                   3152:     {
                   3153:     if (offsetcount >= 2)
                   3154:       {
                   3155:       offsets[0] = erroroffset;
                   3156:       offsets[1] = errorcode;
                   3157:       }
                   3158:     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
                   3159:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
                   3160:     }
                   3161:   if (start_offset > 0 && start_offset < length &&
                   3162:         (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
                   3163:     return PCRE_ERROR_BADUTF8_OFFSET;
                   3164:   }
                   3165: #endif
                   3166: 
                   3167: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   3168: is a feature that makes it possible to save compiled regex and re-use them
                   3169: in other programs later. */
                   3170: 
                   3171: if (md->tables == NULL) md->tables = _pcre_default_tables;
                   3172: 
                   3173: /* The lower casing table and the "must be at the start of a line" flag are
                   3174: used in a loop when finding where to start. */
                   3175: 
                   3176: lcc = md->tables + lcc_offset;
                   3177: startline = (re->flags & PCRE_STARTLINE) != 0;
                   3178: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   3179: 
                   3180: /* Set up the first character to match, if available. The first_byte value is
                   3181: never set for an anchored regular expression, but the anchoring may be forced
                   3182: at run time, so we have to test for anchoring. The first char may be unset for
                   3183: an unanchored pattern, of course. If there's no first char and the pattern was
                   3184: studied, there may be a bitmap of possible first characters. */
                   3185: 
                   3186: if (!anchored)
                   3187:   {
                   3188:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   3189:     {
                   3190:     first_byte = re->first_byte & 255;
                   3191:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   3192:       first_byte = lcc[first_byte];
                   3193:     }
                   3194:   else
                   3195:     {
                   3196:     if (!startline && study != NULL &&
                   3197:          (study->flags & PCRE_STUDY_MAPPED) != 0)
                   3198:       start_bits = study->start_bits;
                   3199:     }
                   3200:   }
                   3201: 
                   3202: /* For anchored or unanchored matches, there may be a "last known required
                   3203: character" set. */
                   3204: 
                   3205: if ((re->flags & PCRE_REQCHSET) != 0)
                   3206:   {
                   3207:   req_byte = re->req_byte & 255;
                   3208:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   3209:   req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
                   3210:   }
                   3211: 
                   3212: /* Call the main matching function, looping for a non-anchored regex after a
                   3213: failed match. If not restarting, perform certain optimizations at the start of
                   3214: a match. */
                   3215: 
                   3216: for (;;)
                   3217:   {
                   3218:   int rc;
                   3219: 
                   3220:   if ((options & PCRE_DFA_RESTART) == 0)
                   3221:     {
                   3222:     const uschar *save_end_subject = end_subject;
                   3223: 
                   3224:     /* If firstline is TRUE, the start of the match is constrained to the first
                   3225:     line of a multiline string. Implement this by temporarily adjusting
                   3226:     end_subject so that we stop scanning at a newline. If the match fails at
                   3227:     the newline, later code breaks this loop. */
                   3228: 
                   3229:     if (firstline)
                   3230:       {
                   3231:       USPTR t = current_subject;
                   3232: #ifdef SUPPORT_UTF8
                   3233:       if (utf8)
                   3234:         {
                   3235:         while (t < md->end_subject && !IS_NEWLINE(t))
                   3236:           {
                   3237:           t++;
                   3238:           while (t < end_subject && (*t & 0xc0) == 0x80) t++;
                   3239:           }
                   3240:         }
                   3241:       else
                   3242: #endif
                   3243:       while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   3244:       end_subject = t;
                   3245:       }
                   3246: 
                   3247:     /* There are some optimizations that avoid running the match if a known
                   3248:     starting point is not found. However, there is an option that disables
                   3249:     these, for testing and for ensuring that all callouts do actually occur.
                   3250:     The option can be set in the regex by (*NO_START_OPT) or passed in
                   3251:     match-time options. */
                   3252: 
                   3253:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   3254:       {
                   3255:       /* Advance to a known first byte. */
                   3256: 
                   3257:       if (first_byte >= 0)
                   3258:         {
                   3259:         if (first_byte_caseless)
                   3260:           while (current_subject < end_subject &&
                   3261:                  lcc[*current_subject] != first_byte)
                   3262:             current_subject++;
                   3263:         else
                   3264:           while (current_subject < end_subject &&
                   3265:                  *current_subject != first_byte)
                   3266:             current_subject++;
                   3267:         }
                   3268: 
                   3269:       /* Or to just after a linebreak for a multiline match if possible */
                   3270: 
                   3271:       else if (startline)
                   3272:         {
                   3273:         if (current_subject > md->start_subject + start_offset)
                   3274:           {
                   3275: #ifdef SUPPORT_UTF8
                   3276:           if (utf8)
                   3277:             {
                   3278:             while (current_subject < end_subject &&
                   3279:                    !WAS_NEWLINE(current_subject))
                   3280:               {
                   3281:               current_subject++;
                   3282:               while(current_subject < end_subject &&
                   3283:                     (*current_subject & 0xc0) == 0x80)
                   3284:                 current_subject++;
                   3285:               }
                   3286:             }
                   3287:           else
                   3288: #endif
                   3289:           while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
                   3290:             current_subject++;
                   3291: 
                   3292:           /* If we have just passed a CR and the newline option is ANY or
                   3293:           ANYCRLF, and we are now at a LF, advance the match position by one
                   3294:           more character. */
                   3295: 
                   3296:           if (current_subject[-1] == CHAR_CR &&
                   3297:                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   3298:                current_subject < end_subject &&
                   3299:                *current_subject == CHAR_NL)
                   3300:             current_subject++;
                   3301:           }
                   3302:         }
                   3303: 
                   3304:       /* Or to a non-unique first char after study */
                   3305: 
                   3306:       else if (start_bits != NULL)
                   3307:         {
                   3308:         while (current_subject < end_subject)
                   3309:           {
                   3310:           register unsigned int c = *current_subject;
                   3311:           if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   3312:             {
                   3313:             current_subject++;
                   3314: #ifdef SUPPORT_UTF8
                   3315:             if (utf8)
                   3316:               while(current_subject < end_subject &&
                   3317:                     (*current_subject & 0xc0) == 0x80) current_subject++;
                   3318: #endif
                   3319:             }
                   3320:           else break;
                   3321:           }
                   3322:         }
                   3323:       }
                   3324: 
                   3325:     /* Restore fudged end_subject */
                   3326: 
                   3327:     end_subject = save_end_subject;
                   3328: 
                   3329:     /* The following two optimizations are disabled for partial matching or if
                   3330:     disabling is explicitly requested (and of course, by the test above, this
                   3331:     code is not obeyed when restarting after a partial match). */
                   3332: 
                   3333:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
                   3334:         (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
                   3335:       {
                   3336:       /* If the pattern was studied, a minimum subject length may be set. This
                   3337:       is a lower bound; no actual string of that length may actually match the
                   3338:       pattern. Although the value is, strictly, in characters, we treat it as
                   3339:       bytes to avoid spending too much time in this optimization. */
                   3340: 
                   3341:       if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   3342:           (pcre_uint32)(end_subject - current_subject) < study->minlength)
                   3343:         return PCRE_ERROR_NOMATCH;
                   3344: 
                   3345:       /* If req_byte is set, we know that that character must appear in the
                   3346:       subject for the match to succeed. If the first character is set, req_byte
                   3347:       must be later in the subject; otherwise the test starts at the match
                   3348:       point. This optimization can save a huge amount of work in patterns with
                   3349:       nested unlimited repeats that aren't going to match. Writing separate
                   3350:       code for cased/caseless versions makes it go faster, as does using an
                   3351:       autoincrement and backing off on a match.
                   3352: 
                   3353:       HOWEVER: when the subject string is very, very long, searching to its end
                   3354:       can take a long time, and give bad performance on quite ordinary
                   3355:       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
                   3356:       string... so we don't do this when the string is sufficiently long. */
                   3357: 
                   3358:       if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
                   3359:         {
                   3360:         register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
                   3361: 
                   3362:         /* We don't need to repeat the search if we haven't yet reached the
                   3363:         place we found it at last time. */
                   3364: 
                   3365:         if (p > req_byte_ptr)
                   3366:           {
                   3367:           if (req_byte_caseless)
                   3368:             {
                   3369:             while (p < end_subject)
                   3370:               {
                   3371:               register int pp = *p++;
                   3372:               if (pp == req_byte || pp == req_byte2) { p--; break; }
                   3373:               }
                   3374:             }
                   3375:           else
                   3376:             {
                   3377:             while (p < end_subject)
                   3378:               {
                   3379:               if (*p++ == req_byte) { p--; break; }
                   3380:               }
                   3381:             }
                   3382: 
                   3383:           /* If we can't find the required character, break the matching loop,
                   3384:           which will cause a return or PCRE_ERROR_NOMATCH. */
                   3385: 
                   3386:           if (p >= end_subject) break;
                   3387: 
                   3388:           /* If we have found the required character, save the point where we
                   3389:           found it, so that we don't search again next time round the loop if
                   3390:           the start hasn't passed this character yet. */
                   3391: 
                   3392:           req_byte_ptr = p;
                   3393:           }
                   3394:         }
                   3395:       }
                   3396:     }   /* End of optimizations that are done when not restarting */
                   3397: 
                   3398:   /* OK, now we can do the business */
                   3399: 
                   3400:   md->start_used_ptr = current_subject;
                   3401:   md->recursive = NULL;
                   3402: 
                   3403:   rc = internal_dfa_exec(
                   3404:     md,                                /* fixed match data */
                   3405:     md->start_code,                    /* this subexpression's code */
                   3406:     current_subject,                   /* where we currently are */
                   3407:     start_offset,                      /* start offset in subject */
                   3408:     offsets,                           /* offset vector */
                   3409:     offsetcount,                       /* size of same */
                   3410:     workspace,                         /* workspace vector */
                   3411:     wscount,                           /* size of same */
                   3412:     0);                                /* function recurse level */
                   3413: 
                   3414:   /* Anything other than "no match" means we are done, always; otherwise, carry
                   3415:   on only if not anchored. */
                   3416: 
                   3417:   if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
                   3418: 
                   3419:   /* Advance to the next subject character unless we are at the end of a line
                   3420:   and firstline is set. */
                   3421: 
                   3422:   if (firstline && IS_NEWLINE(current_subject)) break;
                   3423:   current_subject++;
                   3424:   if (utf8)
                   3425:     {
                   3426:     while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
                   3427:       current_subject++;
                   3428:     }
                   3429:   if (current_subject > end_subject) break;
                   3430: 
                   3431:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   3432:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   3433:   or ANY or ANYCRLF, advance the match position by one more character. */
                   3434: 
                   3435:   if (current_subject[-1] == CHAR_CR &&
                   3436:       current_subject < end_subject &&
                   3437:       *current_subject == CHAR_NL &&
                   3438:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   3439:         (md->nltype == NLTYPE_ANY ||
                   3440:          md->nltype == NLTYPE_ANYCRLF ||
                   3441:          md->nllen == 2))
                   3442:     current_subject++;
                   3443: 
                   3444:   }   /* "Bumpalong" loop */
                   3445: 
                   3446: return PCRE_ERROR_NOMATCH;
                   3447: }
                   3448: 
                   3449: /* End of pcre_dfa_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>