File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_dfa_exec.c
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:04 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: /* PCRE is a library of functions to support regular expressions whose syntax
    6: and semantics are as close as possible to those of the Perl 5 language (but see
    7: below for why this module is different).
    8: 
    9:                        Written by Philip Hazel
   10:            Copyright (c) 1997-2013 University of Cambridge
   11: 
   12: -----------------------------------------------------------------------------
   13: Redistribution and use in source and binary forms, with or without
   14: modification, are permitted provided that the following conditions are met:
   15: 
   16:     * Redistributions of source code must retain the above copyright notice,
   17:       this list of conditions and the following disclaimer.
   18: 
   19:     * Redistributions in binary form must reproduce the above copyright
   20:       notice, this list of conditions and the following disclaimer in the
   21:       documentation and/or other materials provided with the distribution.
   22: 
   23:     * Neither the name of the University of Cambridge nor the names of its
   24:       contributors may be used to endorse or promote products derived from
   25:       this software without specific prior written permission.
   26: 
   27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37: POSSIBILITY OF SUCH DAMAGE.
   38: -----------------------------------------------------------------------------
   39: */
   40: 
   41: /* This module contains the external function pcre_dfa_exec(), which is an
   42: alternative matching function that uses a sort of DFA algorithm (not a true
   43: FSM). This is NOT Perl-compatible, but it has advantages in certain
   44: applications. */
   45: 
   46: 
   47: /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
   48: the performance of his patterns greatly. I could not use it as it stood, as it
   49: was not thread safe, and made assumptions about pattern sizes. Also, it caused
   50: test 7 to loop, and test 9 to crash with a segfault.
   51: 
   52: The issue is the check for duplicate states, which is done by a simple linear
   53: search up the state list. (Grep for "duplicate" below to find the code.) For
   54: many patterns, there will never be many states active at one time, so a simple
   55: linear search is fine. In patterns that have many active states, it might be a
   56: bottleneck. The suggested code used an indexing scheme to remember which states
   57: had previously been used for each character, and avoided the linear search when
   58: it knew there was no chance of a duplicate. This was implemented when adding
   59: states to the state lists.
   60: 
   61: I wrote some thread-safe, not-limited code to try something similar at the time
   62: of checking for duplicates (instead of when adding states), using index vectors
   63: on the stack. It did give a 13% improvement with one specially constructed
   64: pattern for certain subject strings, but on other strings and on many of the
   65: simpler patterns in the test suite it did worse. The major problem, I think,
   66: was the extra time to initialize the index. This had to be done for each call
   67: of internal_dfa_exec(). (The supplied patch used a static vector, initialized
   68: only once - I suspect this was the cause of the problems with the tests.)
   69: 
   70: Overall, I concluded that the gains in some cases did not outweigh the losses
   71: in others, so I abandoned this code. */
   72: 
   73: 
   74: 
   75: #ifdef HAVE_CONFIG_H
   76: #include "config.h"
   77: #endif
   78: 
   79: #define NLBLOCK md             /* Block containing newline information */
   80: #define PSSTART start_subject  /* Field containing processed string start */
   81: #define PSEND   end_subject    /* Field containing processed string end */
   82: 
   83: #include "pcre_internal.h"
   84: 
   85: 
   86: /* For use to indent debugging output */
   87: 
   88: #define SP "                   "
   89: 
   90: 
   91: /*************************************************
   92: *      Code parameters and static tables         *
   93: *************************************************/
   94: 
   95: /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
   96: into others, under special conditions. A gap of 20 between the blocks should be
   97: enough. The resulting opcodes don't have to be less than 256 because they are
   98: never stored, so we push them well clear of the normal opcodes. */
   99: 
  100: #define OP_PROP_EXTRA       300
  101: #define OP_EXTUNI_EXTRA     320
  102: #define OP_ANYNL_EXTRA      340
  103: #define OP_HSPACE_EXTRA     360
  104: #define OP_VSPACE_EXTRA     380
  105: 
  106: 
  107: /* This table identifies those opcodes that are followed immediately by a
  108: character that is to be tested in some way. This makes it possible to
  109: centralize the loading of these characters. In the case of Type * etc, the
  110: "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
  111: small value. Non-zero values in the table are the offsets from the opcode where
  112: the character is to be found. ***NOTE*** If the start of this table is
  113: modified, the three tables that follow must also be modified. */
  114: 
  115: static const pcre_uint8 coptable[] = {
  116:   0,                             /* End                                    */
  117:   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
  118:   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
  119:   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
  120:   0, 0,                          /* \P, \p                                 */
  121:   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
  122:   0,                             /* \X                                     */
  123:   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
  124:   1,                             /* Char                                   */
  125:   1,                             /* Chari                                  */
  126:   1,                             /* not                                    */
  127:   1,                             /* noti                                   */
  128:   /* Positive single-char repeats                                          */
  129:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
  130:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
  131:   1+IMM2_SIZE,                   /* exact                                  */
  132:   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
  133:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
  134:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
  135:   1+IMM2_SIZE,                   /* exact I                                */
  136:   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
  137:   /* Negative single-char repeats - only for chars < 256                   */
  138:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
  139:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
  140:   1+IMM2_SIZE,                   /* NOT exact                              */
  141:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
  142:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
  143:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
  144:   1+IMM2_SIZE,                   /* NOT exact I                            */
  145:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
  146:   /* Positive type repeats                                                 */
  147:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
  148:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
  149:   1+IMM2_SIZE,                   /* Type exact                             */
  150:   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
  151:   /* Character class & ref repeats                                         */
  152:   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
  153:   0, 0,                          /* CRRANGE, CRMINRANGE                    */
  154:   0, 0, 0, 0,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
  155:   0,                             /* CLASS                                  */
  156:   0,                             /* NCLASS                                 */
  157:   0,                             /* XCLASS - variable length               */
  158:   0,                             /* REF                                    */
  159:   0,                             /* REFI                                   */
  160:   0,                             /* DNREF                                  */
  161:   0,                             /* DNREFI                                 */
  162:   0,                             /* RECURSE                                */
  163:   0,                             /* CALLOUT                                */
  164:   0,                             /* Alt                                    */
  165:   0,                             /* Ket                                    */
  166:   0,                             /* KetRmax                                */
  167:   0,                             /* KetRmin                                */
  168:   0,                             /* KetRpos                                */
  169:   0,                             /* Reverse                                */
  170:   0,                             /* Assert                                 */
  171:   0,                             /* Assert not                             */
  172:   0,                             /* Assert behind                          */
  173:   0,                             /* Assert behind not                      */
  174:   0, 0,                          /* ONCE, ONCE_NC                          */
  175:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
  176:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
  177:   0, 0,                          /* CREF, DNCREF                           */
  178:   0, 0,                          /* RREF, DNRREF                           */
  179:   0,                             /* DEF                                    */
  180:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
  181:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
  182:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
  183:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
  184:   0, 0                           /* CLOSE, SKIPZERO  */
  185: };
  186: 
  187: /* This table identifies those opcodes that inspect a character. It is used to
  188: remember the fact that a character could have been inspected when the end of
  189: the subject is reached. ***NOTE*** If the start of this table is modified, the
  190: two tables that follow must also be modified. */
  191: 
  192: static const pcre_uint8 poptable[] = {
  193:   0,                             /* End                                    */
  194:   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
  195:   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
  196:   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
  197:   1, 1,                          /* \P, \p                                 */
  198:   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
  199:   1,                             /* \X                                     */
  200:   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
  201:   1,                             /* Char                                   */
  202:   1,                             /* Chari                                  */
  203:   1,                             /* not                                    */
  204:   1,                             /* noti                                   */
  205:   /* Positive single-char repeats                                          */
  206:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
  207:   1, 1, 1,                       /* upto, minupto, exact                   */
  208:   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
  209:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
  210:   1, 1, 1,                       /* upto I, minupto I, exact I             */
  211:   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
  212:   /* Negative single-char repeats - only for chars < 256                   */
  213:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
  214:   1, 1, 1,                       /* NOT upto, minupto, exact               */
  215:   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
  216:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
  217:   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
  218:   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
  219:   /* Positive type repeats                                                 */
  220:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
  221:   1, 1, 1,                       /* Type upto, minupto, exact              */
  222:   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
  223:   /* Character class & ref repeats                                         */
  224:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
  225:   1, 1,                          /* CRRANGE, CRMINRANGE                    */
  226:   1, 1, 1, 1,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
  227:   1,                             /* CLASS                                  */
  228:   1,                             /* NCLASS                                 */
  229:   1,                             /* XCLASS - variable length               */
  230:   0,                             /* REF                                    */
  231:   0,                             /* REFI                                   */
  232:   0,                             /* DNREF                                  */
  233:   0,                             /* DNREFI                                 */
  234:   0,                             /* RECURSE                                */
  235:   0,                             /* CALLOUT                                */
  236:   0,                             /* Alt                                    */
  237:   0,                             /* Ket                                    */
  238:   0,                             /* KetRmax                                */
  239:   0,                             /* KetRmin                                */
  240:   0,                             /* KetRpos                                */
  241:   0,                             /* Reverse                                */
  242:   0,                             /* Assert                                 */
  243:   0,                             /* Assert not                             */
  244:   0,                             /* Assert behind                          */
  245:   0,                             /* Assert behind not                      */
  246:   0, 0,                          /* ONCE, ONCE_NC                          */
  247:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
  248:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
  249:   0, 0,                          /* CREF, DNCREF                           */
  250:   0, 0,                          /* RREF, DNRREF                           */
  251:   0,                             /* DEF                                    */
  252:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
  253:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
  254:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
  255:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
  256:   0, 0                           /* CLOSE, SKIPZERO                        */
  257: };
  258: 
  259: /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
  260: and \w */
  261: 
  262: static const pcre_uint8 toptable1[] = {
  263:   0, 0, 0, 0, 0, 0,
  264:   ctype_digit, ctype_digit,
  265:   ctype_space, ctype_space,
  266:   ctype_word,  ctype_word,
  267:   0, 0                            /* OP_ANY, OP_ALLANY */
  268: };
  269: 
  270: static const pcre_uint8 toptable2[] = {
  271:   0, 0, 0, 0, 0, 0,
  272:   ctype_digit, 0,
  273:   ctype_space, 0,
  274:   ctype_word,  0,
  275:   1, 1                            /* OP_ANY, OP_ALLANY */
  276: };
  277: 
  278: 
  279: /* Structure for holding data about a particular state, which is in effect the
  280: current data for an active path through the match tree. It must consist
  281: entirely of ints because the working vector we are passed, and which we put
  282: these structures in, is a vector of ints. */
  283: 
  284: typedef struct stateblock {
  285:   int offset;                     /* Offset to opcode */
  286:   int count;                      /* Count for repeats */
  287:   int data;                       /* Some use extra data */
  288: } stateblock;
  289: 
  290: #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
  291: 
  292: 
  293: #ifdef PCRE_DEBUG
  294: /*************************************************
  295: *             Print character string             *
  296: *************************************************/
  297: 
  298: /* Character string printing function for debugging.
  299: 
  300: Arguments:
  301:   p            points to string
  302:   length       number of bytes
  303:   f            where to print
  304: 
  305: Returns:       nothing
  306: */
  307: 
  308: static void
  309: pchars(const pcre_uchar *p, int length, FILE *f)
  310: {
  311: pcre_uint32 c;
  312: while (length-- > 0)
  313:   {
  314:   if (isprint(c = *(p++)))
  315:     fprintf(f, "%c", c);
  316:   else
  317:     fprintf(f, "\\x{%02x}", c);
  318:   }
  319: }
  320: #endif
  321: 
  322: 
  323: 
  324: /*************************************************
  325: *    Execute a Regular Expression - DFA engine   *
  326: *************************************************/
  327: 
  328: /* This internal function applies a compiled pattern to a subject string,
  329: starting at a given point, using a DFA engine. This function is called from the
  330: external one, possibly multiple times if the pattern is not anchored. The
  331: function calls itself recursively for some kinds of subpattern.
  332: 
  333: Arguments:
  334:   md                the match_data block with fixed information
  335:   this_start_code   the opening bracket of this subexpression's code
  336:   current_subject   where we currently are in the subject string
  337:   start_offset      start offset in the subject string
  338:   offsets           vector to contain the matching string offsets
  339:   offsetcount       size of same
  340:   workspace         vector of workspace
  341:   wscount           size of same
  342:   rlevel            function call recursion level
  343: 
  344: Returns:            > 0 => number of match offset pairs placed in offsets
  345:                     = 0 => offsets overflowed; longest matches are present
  346:                      -1 => failed to match
  347:                    < -1 => some kind of unexpected problem
  348: 
  349: The following macros are used for adding states to the two state vectors (one
  350: for the current character, one for the following character). */
  351: 
  352: #define ADD_ACTIVE(x,y) \
  353:   if (active_count++ < wscount) \
  354:     { \
  355:     next_active_state->offset = (x); \
  356:     next_active_state->count  = (y); \
  357:     next_active_state++; \
  358:     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
  359:     } \
  360:   else return PCRE_ERROR_DFA_WSSIZE
  361: 
  362: #define ADD_ACTIVE_DATA(x,y,z) \
  363:   if (active_count++ < wscount) \
  364:     { \
  365:     next_active_state->offset = (x); \
  366:     next_active_state->count  = (y); \
  367:     next_active_state->data   = (z); \
  368:     next_active_state++; \
  369:     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
  370:     } \
  371:   else return PCRE_ERROR_DFA_WSSIZE
  372: 
  373: #define ADD_NEW(x,y) \
  374:   if (new_count++ < wscount) \
  375:     { \
  376:     next_new_state->offset = (x); \
  377:     next_new_state->count  = (y); \
  378:     next_new_state++; \
  379:     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
  380:     } \
  381:   else return PCRE_ERROR_DFA_WSSIZE
  382: 
  383: #define ADD_NEW_DATA(x,y,z) \
  384:   if (new_count++ < wscount) \
  385:     { \
  386:     next_new_state->offset = (x); \
  387:     next_new_state->count  = (y); \
  388:     next_new_state->data   = (z); \
  389:     next_new_state++; \
  390:     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
  391:       (x), (y), (z), __LINE__)); \
  392:     } \
  393:   else return PCRE_ERROR_DFA_WSSIZE
  394: 
  395: /* And now, here is the code */
  396: 
  397: static int
  398: internal_dfa_exec(
  399:   dfa_match_data *md,
  400:   const pcre_uchar *this_start_code,
  401:   const pcre_uchar *current_subject,
  402:   int start_offset,
  403:   int *offsets,
  404:   int offsetcount,
  405:   int *workspace,
  406:   int wscount,
  407:   int  rlevel)
  408: {
  409: stateblock *active_states, *new_states, *temp_states;
  410: stateblock *next_active_state, *next_new_state;
  411: 
  412: const pcre_uint8 *ctypes, *lcc, *fcc;
  413: const pcre_uchar *ptr;
  414: const pcre_uchar *end_code, *first_op;
  415: 
  416: dfa_recursion_info new_recursive;
  417: 
  418: int active_count, new_count, match_count;
  419: 
  420: /* Some fields in the md block are frequently referenced, so we load them into
  421: independent variables in the hope that this will perform better. */
  422: 
  423: const pcre_uchar *start_subject = md->start_subject;
  424: const pcre_uchar *end_subject = md->end_subject;
  425: const pcre_uchar *start_code = md->start_code;
  426: 
  427: #ifdef SUPPORT_UTF
  428: BOOL utf = (md->poptions & PCRE_UTF8) != 0;
  429: #else
  430: BOOL utf = FALSE;
  431: #endif
  432: 
  433: BOOL reset_could_continue = FALSE;
  434: 
  435: rlevel++;
  436: offsetcount &= (-2);
  437: 
  438: wscount -= 2;
  439: wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
  440:           (2 * INTS_PER_STATEBLOCK);
  441: 
  442: DPRINTF(("\n%.*s---------------------\n"
  443:   "%.*sCall to internal_dfa_exec f=%d\n",
  444:   rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
  445: 
  446: ctypes = md->tables + ctypes_offset;
  447: lcc = md->tables + lcc_offset;
  448: fcc = md->tables + fcc_offset;
  449: 
  450: match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
  451: 
  452: active_states = (stateblock *)(workspace + 2);
  453: next_new_state = new_states = active_states + wscount;
  454: new_count = 0;
  455: 
  456: first_op = this_start_code + 1 + LINK_SIZE +
  457:   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
  458:     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
  459:     ? IMM2_SIZE:0);
  460: 
  461: /* The first thing in any (sub) pattern is a bracket of some sort. Push all
  462: the alternative states onto the list, and find out where the end is. This
  463: makes is possible to use this function recursively, when we want to stop at a
  464: matching internal ket rather than at the end.
  465: 
  466: If the first opcode in the first alternative is OP_REVERSE, we are dealing with
  467: a backward assertion. In that case, we have to find out the maximum amount to
  468: move back, and set up each alternative appropriately. */
  469: 
  470: if (*first_op == OP_REVERSE)
  471:   {
  472:   int max_back = 0;
  473:   int gone_back;
  474: 
  475:   end_code = this_start_code;
  476:   do
  477:     {
  478:     int back = GET(end_code, 2+LINK_SIZE);
  479:     if (back > max_back) max_back = back;
  480:     end_code += GET(end_code, 1);
  481:     }
  482:   while (*end_code == OP_ALT);
  483: 
  484:   /* If we can't go back the amount required for the longest lookbehind
  485:   pattern, go back as far as we can; some alternatives may still be viable. */
  486: 
  487: #ifdef SUPPORT_UTF
  488:   /* In character mode we have to step back character by character */
  489: 
  490:   if (utf)
  491:     {
  492:     for (gone_back = 0; gone_back < max_back; gone_back++)
  493:       {
  494:       if (current_subject <= start_subject) break;
  495:       current_subject--;
  496:       ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
  497:       }
  498:     }
  499:   else
  500: #endif
  501: 
  502:   /* In byte-mode we can do this quickly. */
  503: 
  504:     {
  505:     gone_back = (current_subject - max_back < start_subject)?
  506:       (int)(current_subject - start_subject) : max_back;
  507:     current_subject -= gone_back;
  508:     }
  509: 
  510:   /* Save the earliest consulted character */
  511: 
  512:   if (current_subject < md->start_used_ptr)
  513:     md->start_used_ptr = current_subject;
  514: 
  515:   /* Now we can process the individual branches. */
  516: 
  517:   end_code = this_start_code;
  518:   do
  519:     {
  520:     int back = GET(end_code, 2+LINK_SIZE);
  521:     if (back <= gone_back)
  522:       {
  523:       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
  524:       ADD_NEW_DATA(-bstate, 0, gone_back - back);
  525:       }
  526:     end_code += GET(end_code, 1);
  527:     }
  528:   while (*end_code == OP_ALT);
  529:  }
  530: 
  531: /* This is the code for a "normal" subpattern (not a backward assertion). The
  532: start of a whole pattern is always one of these. If we are at the top level,
  533: we may be asked to restart matching from the same point that we reached for a
  534: previous partial match. We still have to scan through the top-level branches to
  535: find the end state. */
  536: 
  537: else
  538:   {
  539:   end_code = this_start_code;
  540: 
  541:   /* Restarting */
  542: 
  543:   if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
  544:     {
  545:     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
  546:     new_count = workspace[1];
  547:     if (!workspace[0])
  548:       memcpy(new_states, active_states, new_count * sizeof(stateblock));
  549:     }
  550: 
  551:   /* Not restarting */
  552: 
  553:   else
  554:     {
  555:     int length = 1 + LINK_SIZE +
  556:       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
  557:         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
  558:         ? IMM2_SIZE:0);
  559:     do
  560:       {
  561:       ADD_NEW((int)(end_code - start_code + length), 0);
  562:       end_code += GET(end_code, 1);
  563:       length = 1 + LINK_SIZE;
  564:       }
  565:     while (*end_code == OP_ALT);
  566:     }
  567:   }
  568: 
  569: workspace[0] = 0;    /* Bit indicating which vector is current */
  570: 
  571: DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
  572: 
  573: /* Loop for scanning the subject */
  574: 
  575: ptr = current_subject;
  576: for (;;)
  577:   {
  578:   int i, j;
  579:   int clen, dlen;
  580:   pcre_uint32 c, d;
  581:   int forced_fail = 0;
  582:   BOOL partial_newline = FALSE;
  583:   BOOL could_continue = reset_could_continue;
  584:   reset_could_continue = FALSE;
  585: 
  586:   /* Make the new state list into the active state list and empty the
  587:   new state list. */
  588: 
  589:   temp_states = active_states;
  590:   active_states = new_states;
  591:   new_states = temp_states;
  592:   active_count = new_count;
  593:   new_count = 0;
  594: 
  595:   workspace[0] ^= 1;              /* Remember for the restarting feature */
  596:   workspace[1] = active_count;
  597: 
  598: #ifdef PCRE_DEBUG
  599:   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
  600:   pchars(ptr, STRLEN_UC(ptr), stdout);
  601:   printf("\"\n");
  602: 
  603:   printf("%.*sActive states: ", rlevel*2-2, SP);
  604:   for (i = 0; i < active_count; i++)
  605:     printf("%d/%d ", active_states[i].offset, active_states[i].count);
  606:   printf("\n");
  607: #endif
  608: 
  609:   /* Set the pointers for adding new states */
  610: 
  611:   next_active_state = active_states + active_count;
  612:   next_new_state = new_states;
  613: 
  614:   /* Load the current character from the subject outside the loop, as many
  615:   different states may want to look at it, and we assume that at least one
  616:   will. */
  617: 
  618:   if (ptr < end_subject)
  619:     {
  620:     clen = 1;        /* Number of data items in the character */
  621: #ifdef SUPPORT_UTF
  622:     GETCHARLENTEST(c, ptr, clen);
  623: #else
  624:     c = *ptr;
  625: #endif  /* SUPPORT_UTF */
  626:     }
  627:   else
  628:     {
  629:     clen = 0;        /* This indicates the end of the subject */
  630:     c = NOTACHAR;    /* This value should never actually be used */
  631:     }
  632: 
  633:   /* Scan up the active states and act on each one. The result of an action
  634:   may be to add more states to the currently active list (e.g. on hitting a
  635:   parenthesis) or it may be to put states on the new list, for considering
  636:   when we move the character pointer on. */
  637: 
  638:   for (i = 0; i < active_count; i++)
  639:     {
  640:     stateblock *current_state = active_states + i;
  641:     BOOL caseless = FALSE;
  642:     const pcre_uchar *code;
  643:     int state_offset = current_state->offset;
  644:     int codevalue, rrc;
  645:     int count;
  646: 
  647: #ifdef PCRE_DEBUG
  648:     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
  649:     if (clen == 0) printf("EOL\n");
  650:       else if (c > 32 && c < 127) printf("'%c'\n", c);
  651:         else printf("0x%02x\n", c);
  652: #endif
  653: 
  654:     /* A negative offset is a special case meaning "hold off going to this
  655:     (negated) state until the number of characters in the data field have
  656:     been skipped". If the could_continue flag was passed over from a previous
  657:     state, arrange for it to passed on. */
  658: 
  659:     if (state_offset < 0)
  660:       {
  661:       if (current_state->data > 0)
  662:         {
  663:         DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
  664:         ADD_NEW_DATA(state_offset, current_state->count,
  665:           current_state->data - 1);
  666:         if (could_continue) reset_could_continue = TRUE;
  667:         continue;
  668:         }
  669:       else
  670:         {
  671:         current_state->offset = state_offset = -state_offset;
  672:         }
  673:       }
  674: 
  675:     /* Check for a duplicate state with the same count, and skip if found.
  676:     See the note at the head of this module about the possibility of improving
  677:     performance here. */
  678: 
  679:     for (j = 0; j < i; j++)
  680:       {
  681:       if (active_states[j].offset == state_offset &&
  682:           active_states[j].count == current_state->count)
  683:         {
  684:         DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
  685:         goto NEXT_ACTIVE_STATE;
  686:         }
  687:       }
  688: 
  689:     /* The state offset is the offset to the opcode */
  690: 
  691:     code = start_code + state_offset;
  692:     codevalue = *code;
  693: 
  694:     /* If this opcode inspects a character, but we are at the end of the
  695:     subject, remember the fact for use when testing for a partial match. */
  696: 
  697:     if (clen == 0 && poptable[codevalue] != 0)
  698:       could_continue = TRUE;
  699: 
  700:     /* If this opcode is followed by an inline character, load it. It is
  701:     tempting to test for the presence of a subject character here, but that
  702:     is wrong, because sometimes zero repetitions of the subject are
  703:     permitted.
  704: 
  705:     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
  706:     argument that is not a data character - but is always one byte long because
  707:     the values are small. We have to take special action to deal with  \P, \p,
  708:     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
  709:     these ones to new opcodes. */
  710: 
  711:     if (coptable[codevalue] > 0)
  712:       {
  713:       dlen = 1;
  714: #ifdef SUPPORT_UTF
  715:       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
  716: #endif  /* SUPPORT_UTF */
  717:       d = code[coptable[codevalue]];
  718:       if (codevalue >= OP_TYPESTAR)
  719:         {
  720:         switch(d)
  721:           {
  722:           case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
  723:           case OP_NOTPROP:
  724:           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
  725:           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
  726:           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
  727:           case OP_NOT_HSPACE:
  728:           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
  729:           case OP_NOT_VSPACE:
  730:           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
  731:           default: break;
  732:           }
  733:         }
  734:       }
  735:     else
  736:       {
  737:       dlen = 0;         /* Not strictly necessary, but compilers moan */
  738:       d = NOTACHAR;     /* if these variables are not set. */
  739:       }
  740: 
  741: 
  742:     /* Now process the individual opcodes */
  743: 
  744:     switch (codevalue)
  745:       {
  746: /* ========================================================================== */
  747:       /* These cases are never obeyed. This is a fudge that causes a compile-
  748:       time error if the vectors coptable or poptable, which are indexed by
  749:       opcode, are not the correct length. It seems to be the only way to do
  750:       such a check at compile time, as the sizeof() operator does not work
  751:       in the C preprocessor. */
  752: 
  753:       case OP_TABLE_LENGTH:
  754:       case OP_TABLE_LENGTH +
  755:         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
  756:          (sizeof(poptable) == OP_TABLE_LENGTH)):
  757:       break;
  758: 
  759: /* ========================================================================== */
  760:       /* Reached a closing bracket. If not at the end of the pattern, carry
  761:       on with the next opcode. For repeating opcodes, also add the repeat
  762:       state. Note that KETRPOS will always be encountered at the end of the
  763:       subpattern, because the possessive subpattern repeats are always handled
  764:       using recursive calls. Thus, it never adds any new states.
  765: 
  766:       At the end of the (sub)pattern, unless we have an empty string and
  767:       PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
  768:       start of the subject, save the match data, shifting up all previous
  769:       matches so we always have the longest first. */
  770: 
  771:       case OP_KET:
  772:       case OP_KETRMIN:
  773:       case OP_KETRMAX:
  774:       case OP_KETRPOS:
  775:       if (code != end_code)
  776:         {
  777:         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
  778:         if (codevalue != OP_KET)
  779:           {
  780:           ADD_ACTIVE(state_offset - GET(code, 1), 0);
  781:           }
  782:         }
  783:       else
  784:         {
  785:         if (ptr > current_subject ||
  786:             ((md->moptions & PCRE_NOTEMPTY) == 0 &&
  787:               ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
  788:                 current_subject > start_subject + md->start_offset)))
  789:           {
  790:           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
  791:             else if (match_count > 0 && ++match_count * 2 > offsetcount)
  792:               match_count = 0;
  793:           count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
  794:           if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
  795:           if (offsetcount >= 2)
  796:             {
  797:             offsets[0] = (int)(current_subject - start_subject);
  798:             offsets[1] = (int)(ptr - start_subject);
  799:             DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
  800:               offsets[1] - offsets[0], (char *)current_subject));
  801:             }
  802:           if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
  803:             {
  804:             DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
  805:               "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
  806:               match_count, rlevel*2-2, SP));
  807:             return match_count;
  808:             }
  809:           }
  810:         }
  811:       break;
  812: 
  813: /* ========================================================================== */
  814:       /* These opcodes add to the current list of states without looking
  815:       at the current character. */
  816: 
  817:       /*-----------------------------------------------------------------*/
  818:       case OP_ALT:
  819:       do { code += GET(code, 1); } while (*code == OP_ALT);
  820:       ADD_ACTIVE((int)(code - start_code), 0);
  821:       break;
  822: 
  823:       /*-----------------------------------------------------------------*/
  824:       case OP_BRA:
  825:       case OP_SBRA:
  826:       do
  827:         {
  828:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
  829:         code += GET(code, 1);
  830:         }
  831:       while (*code == OP_ALT);
  832:       break;
  833: 
  834:       /*-----------------------------------------------------------------*/
  835:       case OP_CBRA:
  836:       case OP_SCBRA:
  837:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
  838:       code += GET(code, 1);
  839:       while (*code == OP_ALT)
  840:         {
  841:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
  842:         code += GET(code, 1);
  843:         }
  844:       break;
  845: 
  846:       /*-----------------------------------------------------------------*/
  847:       case OP_BRAZERO:
  848:       case OP_BRAMINZERO:
  849:       ADD_ACTIVE(state_offset + 1, 0);
  850:       code += 1 + GET(code, 2);
  851:       while (*code == OP_ALT) code += GET(code, 1);
  852:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
  853:       break;
  854: 
  855:       /*-----------------------------------------------------------------*/
  856:       case OP_SKIPZERO:
  857:       code += 1 + GET(code, 2);
  858:       while (*code == OP_ALT) code += GET(code, 1);
  859:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
  860:       break;
  861: 
  862:       /*-----------------------------------------------------------------*/
  863:       case OP_CIRC:
  864:       if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
  865:         { ADD_ACTIVE(state_offset + 1, 0); }
  866:       break;
  867: 
  868:       /*-----------------------------------------------------------------*/
  869:       case OP_CIRCM:
  870:       if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
  871:           (ptr != end_subject && WAS_NEWLINE(ptr)))
  872:         { ADD_ACTIVE(state_offset + 1, 0); }
  873:       break;
  874: 
  875:       /*-----------------------------------------------------------------*/
  876:       case OP_EOD:
  877:       if (ptr >= end_subject)
  878:         {
  879:         if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
  880:           could_continue = TRUE;
  881:         else { ADD_ACTIVE(state_offset + 1, 0); }
  882:         }
  883:       break;
  884: 
  885:       /*-----------------------------------------------------------------*/
  886:       case OP_SOD:
  887:       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
  888:       break;
  889: 
  890:       /*-----------------------------------------------------------------*/
  891:       case OP_SOM:
  892:       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
  893:       break;
  894: 
  895: 
  896: /* ========================================================================== */
  897:       /* These opcodes inspect the next subject character, and sometimes
  898:       the previous one as well, but do not have an argument. The variable
  899:       clen contains the length of the current character and is zero if we are
  900:       at the end of the subject. */
  901: 
  902:       /*-----------------------------------------------------------------*/
  903:       case OP_ANY:
  904:       if (clen > 0 && !IS_NEWLINE(ptr))
  905:         {
  906:         if (ptr + 1 >= md->end_subject &&
  907:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
  908:             NLBLOCK->nltype == NLTYPE_FIXED &&
  909:             NLBLOCK->nllen == 2 &&
  910:             c == NLBLOCK->nl[0])
  911:           {
  912:           could_continue = partial_newline = TRUE;
  913:           }
  914:         else
  915:           {
  916:           ADD_NEW(state_offset + 1, 0);
  917:           }
  918:         }
  919:       break;
  920: 
  921:       /*-----------------------------------------------------------------*/
  922:       case OP_ALLANY:
  923:       if (clen > 0)
  924:         { ADD_NEW(state_offset + 1, 0); }
  925:       break;
  926: 
  927:       /*-----------------------------------------------------------------*/
  928:       case OP_EODN:
  929:       if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
  930:         could_continue = TRUE;
  931:       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
  932:         { ADD_ACTIVE(state_offset + 1, 0); }
  933:       break;
  934: 
  935:       /*-----------------------------------------------------------------*/
  936:       case OP_DOLL:
  937:       if ((md->moptions & PCRE_NOTEOL) == 0)
  938:         {
  939:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
  940:           could_continue = TRUE;
  941:         else if (clen == 0 ||
  942:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
  943:                (ptr == end_subject - md->nllen)
  944:             ))
  945:           { ADD_ACTIVE(state_offset + 1, 0); }
  946:         else if (ptr + 1 >= md->end_subject &&
  947:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
  948:                  NLBLOCK->nltype == NLTYPE_FIXED &&
  949:                  NLBLOCK->nllen == 2 &&
  950:                  c == NLBLOCK->nl[0])
  951:           {
  952:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
  953:             {
  954:             reset_could_continue = TRUE;
  955:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
  956:             }
  957:           else could_continue = partial_newline = TRUE;
  958:           }
  959:         }
  960:       break;
  961: 
  962:       /*-----------------------------------------------------------------*/
  963:       case OP_DOLLM:
  964:       if ((md->moptions & PCRE_NOTEOL) == 0)
  965:         {
  966:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
  967:           could_continue = TRUE;
  968:         else if (clen == 0 ||
  969:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
  970:           { ADD_ACTIVE(state_offset + 1, 0); }
  971:         else if (ptr + 1 >= md->end_subject &&
  972:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
  973:                  NLBLOCK->nltype == NLTYPE_FIXED &&
  974:                  NLBLOCK->nllen == 2 &&
  975:                  c == NLBLOCK->nl[0])
  976:           {
  977:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
  978:             {
  979:             reset_could_continue = TRUE;
  980:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
  981:             }
  982:           else could_continue = partial_newline = TRUE;
  983:           }
  984:         }
  985:       else if (IS_NEWLINE(ptr))
  986:         { ADD_ACTIVE(state_offset + 1, 0); }
  987:       break;
  988: 
  989:       /*-----------------------------------------------------------------*/
  990: 
  991:       case OP_DIGIT:
  992:       case OP_WHITESPACE:
  993:       case OP_WORDCHAR:
  994:       if (clen > 0 && c < 256 &&
  995:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
  996:         { ADD_NEW(state_offset + 1, 0); }
  997:       break;
  998: 
  999:       /*-----------------------------------------------------------------*/
 1000:       case OP_NOT_DIGIT:
 1001:       case OP_NOT_WHITESPACE:
 1002:       case OP_NOT_WORDCHAR:
 1003:       if (clen > 0 && (c >= 256 ||
 1004:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
 1005:         { ADD_NEW(state_offset + 1, 0); }
 1006:       break;
 1007: 
 1008:       /*-----------------------------------------------------------------*/
 1009:       case OP_WORD_BOUNDARY:
 1010:       case OP_NOT_WORD_BOUNDARY:
 1011:         {
 1012:         int left_word, right_word;
 1013: 
 1014:         if (ptr > start_subject)
 1015:           {
 1016:           const pcre_uchar *temp = ptr - 1;
 1017:           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
 1018: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
 1019:           if (utf) { BACKCHAR(temp); }
 1020: #endif
 1021:           GETCHARTEST(d, temp);
 1022: #ifdef SUPPORT_UCP
 1023:           if ((md->poptions & PCRE_UCP) != 0)
 1024:             {
 1025:             if (d == '_') left_word = TRUE; else
 1026:               {
 1027:               int cat = UCD_CATEGORY(d);
 1028:               left_word = (cat == ucp_L || cat == ucp_N);
 1029:               }
 1030:             }
 1031:           else
 1032: #endif
 1033:           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
 1034:           }
 1035:         else left_word = FALSE;
 1036: 
 1037:         if (clen > 0)
 1038:           {
 1039: #ifdef SUPPORT_UCP
 1040:           if ((md->poptions & PCRE_UCP) != 0)
 1041:             {
 1042:             if (c == '_') right_word = TRUE; else
 1043:               {
 1044:               int cat = UCD_CATEGORY(c);
 1045:               right_word = (cat == ucp_L || cat == ucp_N);
 1046:               }
 1047:             }
 1048:           else
 1049: #endif
 1050:           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
 1051:           }
 1052:         else right_word = FALSE;
 1053: 
 1054:         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
 1055:           { ADD_ACTIVE(state_offset + 1, 0); }
 1056:         }
 1057:       break;
 1058: 
 1059: 
 1060:       /*-----------------------------------------------------------------*/
 1061:       /* Check the next character by Unicode property. We will get here only
 1062:       if the support is in the binary; otherwise a compile-time error occurs.
 1063:       */
 1064: 
 1065: #ifdef SUPPORT_UCP
 1066:       case OP_PROP:
 1067:       case OP_NOTPROP:
 1068:       if (clen > 0)
 1069:         {
 1070:         BOOL OK;
 1071:         const pcre_uint32 *cp;
 1072:         const ucd_record * prop = GET_UCD(c);
 1073:         switch(code[1])
 1074:           {
 1075:           case PT_ANY:
 1076:           OK = TRUE;
 1077:           break;
 1078: 
 1079:           case PT_LAMP:
 1080:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
 1081:                prop->chartype == ucp_Lt;
 1082:           break;
 1083: 
 1084:           case PT_GC:
 1085:           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
 1086:           break;
 1087: 
 1088:           case PT_PC:
 1089:           OK = prop->chartype == code[2];
 1090:           break;
 1091: 
 1092:           case PT_SC:
 1093:           OK = prop->script == code[2];
 1094:           break;
 1095: 
 1096:           /* These are specials for combination cases. */
 1097: 
 1098:           case PT_ALNUM:
 1099:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1100:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
 1101:           break;
 1102: 
 1103:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 1104:           which means that Perl space and POSIX space are now identical. PCRE
 1105:           was changed at release 8.34. */
 1106: 
 1107:           case PT_SPACE:    /* Perl space */
 1108:           case PT_PXSPACE:  /* POSIX space */
 1109:           switch(c)
 1110:             {
 1111:             HSPACE_CASES:
 1112:             VSPACE_CASES:
 1113:             OK = TRUE;
 1114:             break;
 1115: 
 1116:             default:
 1117:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
 1118:             break;
 1119:             }
 1120:           break;
 1121: 
 1122:           case PT_WORD:
 1123:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1124:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
 1125:                c == CHAR_UNDERSCORE;
 1126:           break;
 1127: 
 1128:           case PT_CLIST:
 1129:           cp = PRIV(ucd_caseless_sets) + code[2];
 1130:           for (;;)
 1131:             {
 1132:             if (c < *cp) { OK = FALSE; break; }
 1133:             if (c == *cp++) { OK = TRUE; break; }
 1134:             }
 1135:           break;
 1136: 
 1137:           case PT_UCNC:
 1138:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 1139:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 1140:                c >= 0xe000;
 1141:           break;
 1142: 
 1143:           /* Should never occur, but keep compilers from grumbling. */
 1144: 
 1145:           default:
 1146:           OK = codevalue != OP_PROP;
 1147:           break;
 1148:           }
 1149: 
 1150:         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
 1151:         }
 1152:       break;
 1153: #endif
 1154: 
 1155: 
 1156: 
 1157: /* ========================================================================== */
 1158:       /* These opcodes likewise inspect the subject character, but have an
 1159:       argument that is not a data character. It is one of these opcodes:
 1160:       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
 1161:       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
 1162: 
 1163:       case OP_TYPEPLUS:
 1164:       case OP_TYPEMINPLUS:
 1165:       case OP_TYPEPOSPLUS:
 1166:       count = current_state->count;  /* Already matched */
 1167:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 1168:       if (clen > 0)
 1169:         {
 1170:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
 1171:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
 1172:             NLBLOCK->nltype == NLTYPE_FIXED &&
 1173:             NLBLOCK->nllen == 2 &&
 1174:             c == NLBLOCK->nl[0])
 1175:           {
 1176:           could_continue = partial_newline = TRUE;
 1177:           }
 1178:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 1179:             (c < 256 &&
 1180:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 1181:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 1182:           {
 1183:           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
 1184:             {
 1185:             active_count--;            /* Remove non-match possibility */
 1186:             next_active_state--;
 1187:             }
 1188:           count++;
 1189:           ADD_NEW(state_offset, count);
 1190:           }
 1191:         }
 1192:       break;
 1193: 
 1194:       /*-----------------------------------------------------------------*/
 1195:       case OP_TYPEQUERY:
 1196:       case OP_TYPEMINQUERY:
 1197:       case OP_TYPEPOSQUERY:
 1198:       ADD_ACTIVE(state_offset + 2, 0);
 1199:       if (clen > 0)
 1200:         {
 1201:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
 1202:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
 1203:             NLBLOCK->nltype == NLTYPE_FIXED &&
 1204:             NLBLOCK->nllen == 2 &&
 1205:             c == NLBLOCK->nl[0])
 1206:           {
 1207:           could_continue = partial_newline = TRUE;
 1208:           }
 1209:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 1210:             (c < 256 &&
 1211:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 1212:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 1213:           {
 1214:           if (codevalue == OP_TYPEPOSQUERY)
 1215:             {
 1216:             active_count--;            /* Remove non-match possibility */
 1217:             next_active_state--;
 1218:             }
 1219:           ADD_NEW(state_offset + 2, 0);
 1220:           }
 1221:         }
 1222:       break;
 1223: 
 1224:       /*-----------------------------------------------------------------*/
 1225:       case OP_TYPESTAR:
 1226:       case OP_TYPEMINSTAR:
 1227:       case OP_TYPEPOSSTAR:
 1228:       ADD_ACTIVE(state_offset + 2, 0);
 1229:       if (clen > 0)
 1230:         {
 1231:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
 1232:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
 1233:             NLBLOCK->nltype == NLTYPE_FIXED &&
 1234:             NLBLOCK->nllen == 2 &&
 1235:             c == NLBLOCK->nl[0])
 1236:           {
 1237:           could_continue = partial_newline = TRUE;
 1238:           }
 1239:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 1240:             (c < 256 &&
 1241:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 1242:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 1243:           {
 1244:           if (codevalue == OP_TYPEPOSSTAR)
 1245:             {
 1246:             active_count--;            /* Remove non-match possibility */
 1247:             next_active_state--;
 1248:             }
 1249:           ADD_NEW(state_offset, 0);
 1250:           }
 1251:         }
 1252:       break;
 1253: 
 1254:       /*-----------------------------------------------------------------*/
 1255:       case OP_TYPEEXACT:
 1256:       count = current_state->count;  /* Number already matched */
 1257:       if (clen > 0)
 1258:         {
 1259:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
 1260:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
 1261:             NLBLOCK->nltype == NLTYPE_FIXED &&
 1262:             NLBLOCK->nllen == 2 &&
 1263:             c == NLBLOCK->nl[0])
 1264:           {
 1265:           could_continue = partial_newline = TRUE;
 1266:           }
 1267:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 1268:             (c < 256 &&
 1269:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 1270:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 1271:           {
 1272:           if (++count >= (int)GET2(code, 1))
 1273:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
 1274:           else
 1275:             { ADD_NEW(state_offset, count); }
 1276:           }
 1277:         }
 1278:       break;
 1279: 
 1280:       /*-----------------------------------------------------------------*/
 1281:       case OP_TYPEUPTO:
 1282:       case OP_TYPEMINUPTO:
 1283:       case OP_TYPEPOSUPTO:
 1284:       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
 1285:       count = current_state->count;  /* Number already matched */
 1286:       if (clen > 0)
 1287:         {
 1288:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
 1289:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
 1290:             NLBLOCK->nltype == NLTYPE_FIXED &&
 1291:             NLBLOCK->nllen == 2 &&
 1292:             c == NLBLOCK->nl[0])
 1293:           {
 1294:           could_continue = partial_newline = TRUE;
 1295:           }
 1296:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 1297:             (c < 256 &&
 1298:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 1299:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 1300:           {
 1301:           if (codevalue == OP_TYPEPOSUPTO)
 1302:             {
 1303:             active_count--;           /* Remove non-match possibility */
 1304:             next_active_state--;
 1305:             }
 1306:           if (++count >= (int)GET2(code, 1))
 1307:             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
 1308:           else
 1309:             { ADD_NEW(state_offset, count); }
 1310:           }
 1311:         }
 1312:       break;
 1313: 
 1314: /* ========================================================================== */
 1315:       /* These are virtual opcodes that are used when something like
 1316:       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
 1317:       argument. It keeps the code above fast for the other cases. The argument
 1318:       is in the d variable. */
 1319: 
 1320: #ifdef SUPPORT_UCP
 1321:       case OP_PROP_EXTRA + OP_TYPEPLUS:
 1322:       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
 1323:       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
 1324:       count = current_state->count;           /* Already matched */
 1325:       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
 1326:       if (clen > 0)
 1327:         {
 1328:         BOOL OK;
 1329:         const pcre_uint32 *cp;
 1330:         const ucd_record * prop = GET_UCD(c);
 1331:         switch(code[2])
 1332:           {
 1333:           case PT_ANY:
 1334:           OK = TRUE;
 1335:           break;
 1336: 
 1337:           case PT_LAMP:
 1338:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
 1339:             prop->chartype == ucp_Lt;
 1340:           break;
 1341: 
 1342:           case PT_GC:
 1343:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
 1344:           break;
 1345: 
 1346:           case PT_PC:
 1347:           OK = prop->chartype == code[3];
 1348:           break;
 1349: 
 1350:           case PT_SC:
 1351:           OK = prop->script == code[3];
 1352:           break;
 1353: 
 1354:           /* These are specials for combination cases. */
 1355: 
 1356:           case PT_ALNUM:
 1357:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1358:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
 1359:           break;
 1360: 
 1361:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 1362:           which means that Perl space and POSIX space are now identical. PCRE
 1363:           was changed at release 8.34. */
 1364: 
 1365:           case PT_SPACE:    /* Perl space */
 1366:           case PT_PXSPACE:  /* POSIX space */
 1367:           switch(c)
 1368:             {
 1369:             HSPACE_CASES:
 1370:             VSPACE_CASES:
 1371:             OK = TRUE;
 1372:             break;
 1373: 
 1374:             default:
 1375:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
 1376:             break;
 1377:             }
 1378:           break;
 1379: 
 1380:           case PT_WORD:
 1381:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1382:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
 1383:                c == CHAR_UNDERSCORE;
 1384:           break;
 1385: 
 1386:           case PT_CLIST:
 1387:           cp = PRIV(ucd_caseless_sets) + code[3];
 1388:           for (;;)
 1389:             {
 1390:             if (c < *cp) { OK = FALSE; break; }
 1391:             if (c == *cp++) { OK = TRUE; break; }
 1392:             }
 1393:           break;
 1394: 
 1395:           case PT_UCNC:
 1396:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 1397:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 1398:                c >= 0xe000;
 1399:           break;
 1400: 
 1401:           /* Should never occur, but keep compilers from grumbling. */
 1402: 
 1403:           default:
 1404:           OK = codevalue != OP_PROP;
 1405:           break;
 1406:           }
 1407: 
 1408:         if (OK == (d == OP_PROP))
 1409:           {
 1410:           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
 1411:             {
 1412:             active_count--;           /* Remove non-match possibility */
 1413:             next_active_state--;
 1414:             }
 1415:           count++;
 1416:           ADD_NEW(state_offset, count);
 1417:           }
 1418:         }
 1419:       break;
 1420: 
 1421:       /*-----------------------------------------------------------------*/
 1422:       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
 1423:       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
 1424:       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
 1425:       count = current_state->count;  /* Already matched */
 1426:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 1427:       if (clen > 0)
 1428:         {
 1429:         int lgb, rgb;
 1430:         const pcre_uchar *nptr = ptr + clen;
 1431:         int ncount = 0;
 1432:         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
 1433:           {
 1434:           active_count--;           /* Remove non-match possibility */
 1435:           next_active_state--;
 1436:           }
 1437:         lgb = UCD_GRAPHBREAK(c);
 1438:         while (nptr < end_subject)
 1439:           {
 1440:           dlen = 1;
 1441:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
 1442:           rgb = UCD_GRAPHBREAK(d);
 1443:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 1444:           ncount++;
 1445:           lgb = rgb;
 1446:           nptr += dlen;
 1447:           }
 1448:         count++;
 1449:         ADD_NEW_DATA(-state_offset, count, ncount);
 1450:         }
 1451:       break;
 1452: #endif
 1453: 
 1454:       /*-----------------------------------------------------------------*/
 1455:       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
 1456:       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
 1457:       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
 1458:       count = current_state->count;  /* Already matched */
 1459:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 1460:       if (clen > 0)
 1461:         {
 1462:         int ncount = 0;
 1463:         switch (c)
 1464:           {
 1465:           case CHAR_VT:
 1466:           case CHAR_FF:
 1467:           case CHAR_NEL:
 1468: #ifndef EBCDIC
 1469:           case 0x2028:
 1470:           case 0x2029:
 1471: #endif  /* Not EBCDIC */
 1472:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
 1473:           goto ANYNL01;
 1474: 
 1475:           case CHAR_CR:
 1476:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
 1477:           /* Fall through */
 1478: 
 1479:           ANYNL01:
 1480:           case CHAR_LF:
 1481:           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
 1482:             {
 1483:             active_count--;           /* Remove non-match possibility */
 1484:             next_active_state--;
 1485:             }
 1486:           count++;
 1487:           ADD_NEW_DATA(-state_offset, count, ncount);
 1488:           break;
 1489: 
 1490:           default:
 1491:           break;
 1492:           }
 1493:         }
 1494:       break;
 1495: 
 1496:       /*-----------------------------------------------------------------*/
 1497:       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
 1498:       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
 1499:       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
 1500:       count = current_state->count;  /* Already matched */
 1501:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 1502:       if (clen > 0)
 1503:         {
 1504:         BOOL OK;
 1505:         switch (c)
 1506:           {
 1507:           VSPACE_CASES:
 1508:           OK = TRUE;
 1509:           break;
 1510: 
 1511:           default:
 1512:           OK = FALSE;
 1513:           break;
 1514:           }
 1515: 
 1516:         if (OK == (d == OP_VSPACE))
 1517:           {
 1518:           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
 1519:             {
 1520:             active_count--;           /* Remove non-match possibility */
 1521:             next_active_state--;
 1522:             }
 1523:           count++;
 1524:           ADD_NEW_DATA(-state_offset, count, 0);
 1525:           }
 1526:         }
 1527:       break;
 1528: 
 1529:       /*-----------------------------------------------------------------*/
 1530:       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
 1531:       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
 1532:       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
 1533:       count = current_state->count;  /* Already matched */
 1534:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 1535:       if (clen > 0)
 1536:         {
 1537:         BOOL OK;
 1538:         switch (c)
 1539:           {
 1540:           HSPACE_CASES:
 1541:           OK = TRUE;
 1542:           break;
 1543: 
 1544:           default:
 1545:           OK = FALSE;
 1546:           break;
 1547:           }
 1548: 
 1549:         if (OK == (d == OP_HSPACE))
 1550:           {
 1551:           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
 1552:             {
 1553:             active_count--;           /* Remove non-match possibility */
 1554:             next_active_state--;
 1555:             }
 1556:           count++;
 1557:           ADD_NEW_DATA(-state_offset, count, 0);
 1558:           }
 1559:         }
 1560:       break;
 1561: 
 1562:       /*-----------------------------------------------------------------*/
 1563: #ifdef SUPPORT_UCP
 1564:       case OP_PROP_EXTRA + OP_TYPEQUERY:
 1565:       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
 1566:       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
 1567:       count = 4;
 1568:       goto QS1;
 1569: 
 1570:       case OP_PROP_EXTRA + OP_TYPESTAR:
 1571:       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
 1572:       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
 1573:       count = 0;
 1574: 
 1575:       QS1:
 1576: 
 1577:       ADD_ACTIVE(state_offset + 4, 0);
 1578:       if (clen > 0)
 1579:         {
 1580:         BOOL OK;
 1581:         const pcre_uint32 *cp;
 1582:         const ucd_record * prop = GET_UCD(c);
 1583:         switch(code[2])
 1584:           {
 1585:           case PT_ANY:
 1586:           OK = TRUE;
 1587:           break;
 1588: 
 1589:           case PT_LAMP:
 1590:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
 1591:             prop->chartype == ucp_Lt;
 1592:           break;
 1593: 
 1594:           case PT_GC:
 1595:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
 1596:           break;
 1597: 
 1598:           case PT_PC:
 1599:           OK = prop->chartype == code[3];
 1600:           break;
 1601: 
 1602:           case PT_SC:
 1603:           OK = prop->script == code[3];
 1604:           break;
 1605: 
 1606:           /* These are specials for combination cases. */
 1607: 
 1608:           case PT_ALNUM:
 1609:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1610:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
 1611:           break;
 1612: 
 1613:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 1614:           which means that Perl space and POSIX space are now identical. PCRE
 1615:           was changed at release 8.34. */
 1616: 
 1617:           case PT_SPACE:    /* Perl space */
 1618:           case PT_PXSPACE:  /* POSIX space */
 1619:           switch(c)
 1620:             {
 1621:             HSPACE_CASES:
 1622:             VSPACE_CASES:
 1623:             OK = TRUE;
 1624:             break;
 1625: 
 1626:             default:
 1627:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
 1628:             break;
 1629:             }
 1630:           break;
 1631: 
 1632:           case PT_WORD:
 1633:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1634:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
 1635:                c == CHAR_UNDERSCORE;
 1636:           break;
 1637: 
 1638:           case PT_CLIST:
 1639:           cp = PRIV(ucd_caseless_sets) + code[3];
 1640:           for (;;)
 1641:             {
 1642:             if (c < *cp) { OK = FALSE; break; }
 1643:             if (c == *cp++) { OK = TRUE; break; }
 1644:             }
 1645:           break;
 1646: 
 1647:           case PT_UCNC:
 1648:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 1649:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 1650:                c >= 0xe000;
 1651:           break;
 1652: 
 1653:           /* Should never occur, but keep compilers from grumbling. */
 1654: 
 1655:           default:
 1656:           OK = codevalue != OP_PROP;
 1657:           break;
 1658:           }
 1659: 
 1660:         if (OK == (d == OP_PROP))
 1661:           {
 1662:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
 1663:               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
 1664:             {
 1665:             active_count--;           /* Remove non-match possibility */
 1666:             next_active_state--;
 1667:             }
 1668:           ADD_NEW(state_offset + count, 0);
 1669:           }
 1670:         }
 1671:       break;
 1672: 
 1673:       /*-----------------------------------------------------------------*/
 1674:       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
 1675:       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
 1676:       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
 1677:       count = 2;
 1678:       goto QS2;
 1679: 
 1680:       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
 1681:       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
 1682:       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
 1683:       count = 0;
 1684: 
 1685:       QS2:
 1686: 
 1687:       ADD_ACTIVE(state_offset + 2, 0);
 1688:       if (clen > 0)
 1689:         {
 1690:         int lgb, rgb;
 1691:         const pcre_uchar *nptr = ptr + clen;
 1692:         int ncount = 0;
 1693:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
 1694:             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
 1695:           {
 1696:           active_count--;           /* Remove non-match possibility */
 1697:           next_active_state--;
 1698:           }
 1699:         lgb = UCD_GRAPHBREAK(c);
 1700:         while (nptr < end_subject)
 1701:           {
 1702:           dlen = 1;
 1703:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
 1704:           rgb = UCD_GRAPHBREAK(d);
 1705:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 1706:           ncount++;
 1707:           lgb = rgb;
 1708:           nptr += dlen;
 1709:           }
 1710:         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
 1711:         }
 1712:       break;
 1713: #endif
 1714: 
 1715:       /*-----------------------------------------------------------------*/
 1716:       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
 1717:       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
 1718:       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
 1719:       count = 2;
 1720:       goto QS3;
 1721: 
 1722:       case OP_ANYNL_EXTRA + OP_TYPESTAR:
 1723:       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
 1724:       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
 1725:       count = 0;
 1726: 
 1727:       QS3:
 1728:       ADD_ACTIVE(state_offset + 2, 0);
 1729:       if (clen > 0)
 1730:         {
 1731:         int ncount = 0;
 1732:         switch (c)
 1733:           {
 1734:           case CHAR_VT:
 1735:           case CHAR_FF:
 1736:           case CHAR_NEL:
 1737: #ifndef EBCDIC
 1738:           case 0x2028:
 1739:           case 0x2029:
 1740: #endif  /* Not EBCDIC */
 1741:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
 1742:           goto ANYNL02;
 1743: 
 1744:           case CHAR_CR:
 1745:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
 1746:           /* Fall through */
 1747: 
 1748:           ANYNL02:
 1749:           case CHAR_LF:
 1750:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
 1751:               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
 1752:             {
 1753:             active_count--;           /* Remove non-match possibility */
 1754:             next_active_state--;
 1755:             }
 1756:           ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
 1757:           break;
 1758: 
 1759:           default:
 1760:           break;
 1761:           }
 1762:         }
 1763:       break;
 1764: 
 1765:       /*-----------------------------------------------------------------*/
 1766:       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
 1767:       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
 1768:       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
 1769:       count = 2;
 1770:       goto QS4;
 1771: 
 1772:       case OP_VSPACE_EXTRA + OP_TYPESTAR:
 1773:       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
 1774:       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
 1775:       count = 0;
 1776: 
 1777:       QS4:
 1778:       ADD_ACTIVE(state_offset + 2, 0);
 1779:       if (clen > 0)
 1780:         {
 1781:         BOOL OK;
 1782:         switch (c)
 1783:           {
 1784:           VSPACE_CASES:
 1785:           OK = TRUE;
 1786:           break;
 1787: 
 1788:           default:
 1789:           OK = FALSE;
 1790:           break;
 1791:           }
 1792:         if (OK == (d == OP_VSPACE))
 1793:           {
 1794:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
 1795:               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
 1796:             {
 1797:             active_count--;           /* Remove non-match possibility */
 1798:             next_active_state--;
 1799:             }
 1800:           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
 1801:           }
 1802:         }
 1803:       break;
 1804: 
 1805:       /*-----------------------------------------------------------------*/
 1806:       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
 1807:       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
 1808:       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
 1809:       count = 2;
 1810:       goto QS5;
 1811: 
 1812:       case OP_HSPACE_EXTRA + OP_TYPESTAR:
 1813:       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
 1814:       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
 1815:       count = 0;
 1816: 
 1817:       QS5:
 1818:       ADD_ACTIVE(state_offset + 2, 0);
 1819:       if (clen > 0)
 1820:         {
 1821:         BOOL OK;
 1822:         switch (c)
 1823:           {
 1824:           HSPACE_CASES:
 1825:           OK = TRUE;
 1826:           break;
 1827: 
 1828:           default:
 1829:           OK = FALSE;
 1830:           break;
 1831:           }
 1832: 
 1833:         if (OK == (d == OP_HSPACE))
 1834:           {
 1835:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
 1836:               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
 1837:             {
 1838:             active_count--;           /* Remove non-match possibility */
 1839:             next_active_state--;
 1840:             }
 1841:           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
 1842:           }
 1843:         }
 1844:       break;
 1845: 
 1846:       /*-----------------------------------------------------------------*/
 1847: #ifdef SUPPORT_UCP
 1848:       case OP_PROP_EXTRA + OP_TYPEEXACT:
 1849:       case OP_PROP_EXTRA + OP_TYPEUPTO:
 1850:       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
 1851:       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
 1852:       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
 1853:         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
 1854:       count = current_state->count;  /* Number already matched */
 1855:       if (clen > 0)
 1856:         {
 1857:         BOOL OK;
 1858:         const pcre_uint32 *cp;
 1859:         const ucd_record * prop = GET_UCD(c);
 1860:         switch(code[1 + IMM2_SIZE + 1])
 1861:           {
 1862:           case PT_ANY:
 1863:           OK = TRUE;
 1864:           break;
 1865: 
 1866:           case PT_LAMP:
 1867:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
 1868:             prop->chartype == ucp_Lt;
 1869:           break;
 1870: 
 1871:           case PT_GC:
 1872:           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
 1873:           break;
 1874: 
 1875:           case PT_PC:
 1876:           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
 1877:           break;
 1878: 
 1879:           case PT_SC:
 1880:           OK = prop->script == code[1 + IMM2_SIZE + 2];
 1881:           break;
 1882: 
 1883:           /* These are specials for combination cases. */
 1884: 
 1885:           case PT_ALNUM:
 1886:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1887:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
 1888:           break;
 1889: 
 1890:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 1891:           which means that Perl space and POSIX space are now identical. PCRE
 1892:           was changed at release 8.34. */
 1893: 
 1894:           case PT_SPACE:    /* Perl space */
 1895:           case PT_PXSPACE:  /* POSIX space */
 1896:           switch(c)
 1897:             {
 1898:             HSPACE_CASES:
 1899:             VSPACE_CASES:
 1900:             OK = TRUE;
 1901:             break;
 1902: 
 1903:             default:
 1904:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
 1905:             break;
 1906:             }
 1907:           break;
 1908: 
 1909:           case PT_WORD:
 1910:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 1911:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
 1912:                c == CHAR_UNDERSCORE;
 1913:           break;
 1914: 
 1915:           case PT_CLIST:
 1916:           cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
 1917:           for (;;)
 1918:             {
 1919:             if (c < *cp) { OK = FALSE; break; }
 1920:             if (c == *cp++) { OK = TRUE; break; }
 1921:             }
 1922:           break;
 1923: 
 1924:           case PT_UCNC:
 1925:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 1926:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 1927:                c >= 0xe000;
 1928:           break;
 1929: 
 1930:           /* Should never occur, but keep compilers from grumbling. */
 1931: 
 1932:           default:
 1933:           OK = codevalue != OP_PROP;
 1934:           break;
 1935:           }
 1936: 
 1937:         if (OK == (d == OP_PROP))
 1938:           {
 1939:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
 1940:             {
 1941:             active_count--;           /* Remove non-match possibility */
 1942:             next_active_state--;
 1943:             }
 1944:           if (++count >= (int)GET2(code, 1))
 1945:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
 1946:           else
 1947:             { ADD_NEW(state_offset, count); }
 1948:           }
 1949:         }
 1950:       break;
 1951: 
 1952:       /*-----------------------------------------------------------------*/
 1953:       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
 1954:       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
 1955:       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
 1956:       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
 1957:       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
 1958:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
 1959:       count = current_state->count;  /* Number already matched */
 1960:       if (clen > 0)
 1961:         {
 1962:         int lgb, rgb;
 1963:         const pcre_uchar *nptr = ptr + clen;
 1964:         int ncount = 0;
 1965:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
 1966:           {
 1967:           active_count--;           /* Remove non-match possibility */
 1968:           next_active_state--;
 1969:           }
 1970:         lgb = UCD_GRAPHBREAK(c);
 1971:         while (nptr < end_subject)
 1972:           {
 1973:           dlen = 1;
 1974:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
 1975:           rgb = UCD_GRAPHBREAK(d);
 1976:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 1977:           ncount++;
 1978:           lgb = rgb;
 1979:           nptr += dlen;
 1980:           }
 1981:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
 1982:             reset_could_continue = TRUE;
 1983:         if (++count >= (int)GET2(code, 1))
 1984:           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
 1985:         else
 1986:           { ADD_NEW_DATA(-state_offset, count, ncount); }
 1987:         }
 1988:       break;
 1989: #endif
 1990: 
 1991:       /*-----------------------------------------------------------------*/
 1992:       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
 1993:       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
 1994:       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
 1995:       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
 1996:       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
 1997:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
 1998:       count = current_state->count;  /* Number already matched */
 1999:       if (clen > 0)
 2000:         {
 2001:         int ncount = 0;
 2002:         switch (c)
 2003:           {
 2004:           case CHAR_VT:
 2005:           case CHAR_FF:
 2006:           case CHAR_NEL:
 2007: #ifndef EBCDIC
 2008:           case 0x2028:
 2009:           case 0x2029:
 2010: #endif  /* Not EBCDIC */
 2011:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
 2012:           goto ANYNL03;
 2013: 
 2014:           case CHAR_CR:
 2015:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
 2016:           /* Fall through */
 2017: 
 2018:           ANYNL03:
 2019:           case CHAR_LF:
 2020:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
 2021:             {
 2022:             active_count--;           /* Remove non-match possibility */
 2023:             next_active_state--;
 2024:             }
 2025:           if (++count >= (int)GET2(code, 1))
 2026:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
 2027:           else
 2028:             { ADD_NEW_DATA(-state_offset, count, ncount); }
 2029:           break;
 2030: 
 2031:           default:
 2032:           break;
 2033:           }
 2034:         }
 2035:       break;
 2036: 
 2037:       /*-----------------------------------------------------------------*/
 2038:       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
 2039:       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
 2040:       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
 2041:       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
 2042:       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
 2043:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
 2044:       count = current_state->count;  /* Number already matched */
 2045:       if (clen > 0)
 2046:         {
 2047:         BOOL OK;
 2048:         switch (c)
 2049:           {
 2050:           VSPACE_CASES:
 2051:           OK = TRUE;
 2052:           break;
 2053: 
 2054:           default:
 2055:           OK = FALSE;
 2056:           }
 2057: 
 2058:         if (OK == (d == OP_VSPACE))
 2059:           {
 2060:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
 2061:             {
 2062:             active_count--;           /* Remove non-match possibility */
 2063:             next_active_state--;
 2064:             }
 2065:           if (++count >= (int)GET2(code, 1))
 2066:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
 2067:           else
 2068:             { ADD_NEW_DATA(-state_offset, count, 0); }
 2069:           }
 2070:         }
 2071:       break;
 2072: 
 2073:       /*-----------------------------------------------------------------*/
 2074:       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
 2075:       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
 2076:       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
 2077:       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
 2078:       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
 2079:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
 2080:       count = current_state->count;  /* Number already matched */
 2081:       if (clen > 0)
 2082:         {
 2083:         BOOL OK;
 2084:         switch (c)
 2085:           {
 2086:           HSPACE_CASES:
 2087:           OK = TRUE;
 2088:           break;
 2089: 
 2090:           default:
 2091:           OK = FALSE;
 2092:           break;
 2093:           }
 2094: 
 2095:         if (OK == (d == OP_HSPACE))
 2096:           {
 2097:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
 2098:             {
 2099:             active_count--;           /* Remove non-match possibility */
 2100:             next_active_state--;
 2101:             }
 2102:           if (++count >= (int)GET2(code, 1))
 2103:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
 2104:           else
 2105:             { ADD_NEW_DATA(-state_offset, count, 0); }
 2106:           }
 2107:         }
 2108:       break;
 2109: 
 2110: /* ========================================================================== */
 2111:       /* These opcodes are followed by a character that is usually compared
 2112:       to the current subject character; it is loaded into d. We still get
 2113:       here even if there is no subject character, because in some cases zero
 2114:       repetitions are permitted. */
 2115: 
 2116:       /*-----------------------------------------------------------------*/
 2117:       case OP_CHAR:
 2118:       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
 2119:       break;
 2120: 
 2121:       /*-----------------------------------------------------------------*/
 2122:       case OP_CHARI:
 2123:       if (clen == 0) break;
 2124: 
 2125: #ifdef SUPPORT_UTF
 2126:       if (utf)
 2127:         {
 2128:         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
 2129:           {
 2130:           unsigned int othercase;
 2131:           if (c < 128)
 2132:             othercase = fcc[c];
 2133:           else
 2134:             /* If we have Unicode property support, we can use it to test the
 2135:             other case of the character. */
 2136: #ifdef SUPPORT_UCP
 2137:             othercase = UCD_OTHERCASE(c);
 2138: #else
 2139:             othercase = NOTACHAR;
 2140: #endif
 2141: 
 2142:           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
 2143:           }
 2144:         }
 2145:       else
 2146: #endif  /* SUPPORT_UTF */
 2147:       /* Not UTF mode */
 2148:         {
 2149:         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
 2150:           { ADD_NEW(state_offset + 2, 0); }
 2151:         }
 2152:       break;
 2153: 
 2154: 
 2155: #ifdef SUPPORT_UCP
 2156:       /*-----------------------------------------------------------------*/
 2157:       /* This is a tricky one because it can match more than one character.
 2158:       Find out how many characters to skip, and then set up a negative state
 2159:       to wait for them to pass before continuing. */
 2160: 
 2161:       case OP_EXTUNI:
 2162:       if (clen > 0)
 2163:         {
 2164:         int lgb, rgb;
 2165:         const pcre_uchar *nptr = ptr + clen;
 2166:         int ncount = 0;
 2167:         lgb = UCD_GRAPHBREAK(c);
 2168:         while (nptr < end_subject)
 2169:           {
 2170:           dlen = 1;
 2171:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
 2172:           rgb = UCD_GRAPHBREAK(d);
 2173:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 2174:           ncount++;
 2175:           lgb = rgb;
 2176:           nptr += dlen;
 2177:           }
 2178:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
 2179:             reset_could_continue = TRUE;
 2180:         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
 2181:         }
 2182:       break;
 2183: #endif
 2184: 
 2185:       /*-----------------------------------------------------------------*/
 2186:       /* This is a tricky like EXTUNI because it too can match more than one
 2187:       character (when CR is followed by LF). In this case, set up a negative
 2188:       state to wait for one character to pass before continuing. */
 2189: 
 2190:       case OP_ANYNL:
 2191:       if (clen > 0) switch(c)
 2192:         {
 2193:         case CHAR_VT:
 2194:         case CHAR_FF:
 2195:         case CHAR_NEL:
 2196: #ifndef EBCDIC
 2197:         case 0x2028:
 2198:         case 0x2029:
 2199: #endif  /* Not EBCDIC */
 2200:         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
 2201: 
 2202:         case CHAR_LF:
 2203:         ADD_NEW(state_offset + 1, 0);
 2204:         break;
 2205: 
 2206:         case CHAR_CR:
 2207:         if (ptr + 1 >= end_subject)
 2208:           {
 2209:           ADD_NEW(state_offset + 1, 0);
 2210:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
 2211:             reset_could_continue = TRUE;
 2212:           }
 2213:         else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
 2214:           {
 2215:           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
 2216:           }
 2217:         else
 2218:           {
 2219:           ADD_NEW(state_offset + 1, 0);
 2220:           }
 2221:         break;
 2222:         }
 2223:       break;
 2224: 
 2225:       /*-----------------------------------------------------------------*/
 2226:       case OP_NOT_VSPACE:
 2227:       if (clen > 0) switch(c)
 2228:         {
 2229:         VSPACE_CASES:
 2230:         break;
 2231: 
 2232:         default:
 2233:         ADD_NEW(state_offset + 1, 0);
 2234:         break;
 2235:         }
 2236:       break;
 2237: 
 2238:       /*-----------------------------------------------------------------*/
 2239:       case OP_VSPACE:
 2240:       if (clen > 0) switch(c)
 2241:         {
 2242:         VSPACE_CASES:
 2243:         ADD_NEW(state_offset + 1, 0);
 2244:         break;
 2245: 
 2246:         default:
 2247:         break;
 2248:         }
 2249:       break;
 2250: 
 2251:       /*-----------------------------------------------------------------*/
 2252:       case OP_NOT_HSPACE:
 2253:       if (clen > 0) switch(c)
 2254:         {
 2255:         HSPACE_CASES:
 2256:         break;
 2257: 
 2258:         default:
 2259:         ADD_NEW(state_offset + 1, 0);
 2260:         break;
 2261:         }
 2262:       break;
 2263: 
 2264:       /*-----------------------------------------------------------------*/
 2265:       case OP_HSPACE:
 2266:       if (clen > 0) switch(c)
 2267:         {
 2268:         HSPACE_CASES:
 2269:         ADD_NEW(state_offset + 1, 0);
 2270:         break;
 2271: 
 2272:         default:
 2273:         break;
 2274:         }
 2275:       break;
 2276: 
 2277:       /*-----------------------------------------------------------------*/
 2278:       /* Match a negated single character casefully. */
 2279: 
 2280:       case OP_NOT:
 2281:       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
 2282:       break;
 2283: 
 2284:       /*-----------------------------------------------------------------*/
 2285:       /* Match a negated single character caselessly. */
 2286: 
 2287:       case OP_NOTI:
 2288:       if (clen > 0)
 2289:         {
 2290:         unsigned int otherd;
 2291: #ifdef SUPPORT_UTF
 2292:         if (utf && d >= 128)
 2293:           {
 2294: #ifdef SUPPORT_UCP
 2295:           otherd = UCD_OTHERCASE(d);
 2296: #endif  /* SUPPORT_UCP */
 2297:           }
 2298:         else
 2299: #endif  /* SUPPORT_UTF */
 2300:         otherd = TABLE_GET(d, fcc, d);
 2301:         if (c != d && c != otherd)
 2302:           { ADD_NEW(state_offset + dlen + 1, 0); }
 2303:         }
 2304:       break;
 2305: 
 2306:       /*-----------------------------------------------------------------*/
 2307:       case OP_PLUSI:
 2308:       case OP_MINPLUSI:
 2309:       case OP_POSPLUSI:
 2310:       case OP_NOTPLUSI:
 2311:       case OP_NOTMINPLUSI:
 2312:       case OP_NOTPOSPLUSI:
 2313:       caseless = TRUE;
 2314:       codevalue -= OP_STARI - OP_STAR;
 2315: 
 2316:       /* Fall through */
 2317:       case OP_PLUS:
 2318:       case OP_MINPLUS:
 2319:       case OP_POSPLUS:
 2320:       case OP_NOTPLUS:
 2321:       case OP_NOTMINPLUS:
 2322:       case OP_NOTPOSPLUS:
 2323:       count = current_state->count;  /* Already matched */
 2324:       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
 2325:       if (clen > 0)
 2326:         {
 2327:         pcre_uint32 otherd = NOTACHAR;
 2328:         if (caseless)
 2329:           {
 2330: #ifdef SUPPORT_UTF
 2331:           if (utf && d >= 128)
 2332:             {
 2333: #ifdef SUPPORT_UCP
 2334:             otherd = UCD_OTHERCASE(d);
 2335: #endif  /* SUPPORT_UCP */
 2336:             }
 2337:           else
 2338: #endif  /* SUPPORT_UTF */
 2339:           otherd = TABLE_GET(d, fcc, d);
 2340:           }
 2341:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
 2342:           {
 2343:           if (count > 0 &&
 2344:               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
 2345:             {
 2346:             active_count--;             /* Remove non-match possibility */
 2347:             next_active_state--;
 2348:             }
 2349:           count++;
 2350:           ADD_NEW(state_offset, count);
 2351:           }
 2352:         }
 2353:       break;
 2354: 
 2355:       /*-----------------------------------------------------------------*/
 2356:       case OP_QUERYI:
 2357:       case OP_MINQUERYI:
 2358:       case OP_POSQUERYI:
 2359:       case OP_NOTQUERYI:
 2360:       case OP_NOTMINQUERYI:
 2361:       case OP_NOTPOSQUERYI:
 2362:       caseless = TRUE;
 2363:       codevalue -= OP_STARI - OP_STAR;
 2364:       /* Fall through */
 2365:       case OP_QUERY:
 2366:       case OP_MINQUERY:
 2367:       case OP_POSQUERY:
 2368:       case OP_NOTQUERY:
 2369:       case OP_NOTMINQUERY:
 2370:       case OP_NOTPOSQUERY:
 2371:       ADD_ACTIVE(state_offset + dlen + 1, 0);
 2372:       if (clen > 0)
 2373:         {
 2374:         pcre_uint32 otherd = NOTACHAR;
 2375:         if (caseless)
 2376:           {
 2377: #ifdef SUPPORT_UTF
 2378:           if (utf && d >= 128)
 2379:             {
 2380: #ifdef SUPPORT_UCP
 2381:             otherd = UCD_OTHERCASE(d);
 2382: #endif  /* SUPPORT_UCP */
 2383:             }
 2384:           else
 2385: #endif  /* SUPPORT_UTF */
 2386:           otherd = TABLE_GET(d, fcc, d);
 2387:           }
 2388:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
 2389:           {
 2390:           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
 2391:             {
 2392:             active_count--;            /* Remove non-match possibility */
 2393:             next_active_state--;
 2394:             }
 2395:           ADD_NEW(state_offset + dlen + 1, 0);
 2396:           }
 2397:         }
 2398:       break;
 2399: 
 2400:       /*-----------------------------------------------------------------*/
 2401:       case OP_STARI:
 2402:       case OP_MINSTARI:
 2403:       case OP_POSSTARI:
 2404:       case OP_NOTSTARI:
 2405:       case OP_NOTMINSTARI:
 2406:       case OP_NOTPOSSTARI:
 2407:       caseless = TRUE;
 2408:       codevalue -= OP_STARI - OP_STAR;
 2409:       /* Fall through */
 2410:       case OP_STAR:
 2411:       case OP_MINSTAR:
 2412:       case OP_POSSTAR:
 2413:       case OP_NOTSTAR:
 2414:       case OP_NOTMINSTAR:
 2415:       case OP_NOTPOSSTAR:
 2416:       ADD_ACTIVE(state_offset + dlen + 1, 0);
 2417:       if (clen > 0)
 2418:         {
 2419:         pcre_uint32 otherd = NOTACHAR;
 2420:         if (caseless)
 2421:           {
 2422: #ifdef SUPPORT_UTF
 2423:           if (utf && d >= 128)
 2424:             {
 2425: #ifdef SUPPORT_UCP
 2426:             otherd = UCD_OTHERCASE(d);
 2427: #endif  /* SUPPORT_UCP */
 2428:             }
 2429:           else
 2430: #endif  /* SUPPORT_UTF */
 2431:           otherd = TABLE_GET(d, fcc, d);
 2432:           }
 2433:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
 2434:           {
 2435:           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
 2436:             {
 2437:             active_count--;            /* Remove non-match possibility */
 2438:             next_active_state--;
 2439:             }
 2440:           ADD_NEW(state_offset, 0);
 2441:           }
 2442:         }
 2443:       break;
 2444: 
 2445:       /*-----------------------------------------------------------------*/
 2446:       case OP_EXACTI:
 2447:       case OP_NOTEXACTI:
 2448:       caseless = TRUE;
 2449:       codevalue -= OP_STARI - OP_STAR;
 2450:       /* Fall through */
 2451:       case OP_EXACT:
 2452:       case OP_NOTEXACT:
 2453:       count = current_state->count;  /* Number already matched */
 2454:       if (clen > 0)
 2455:         {
 2456:         pcre_uint32 otherd = NOTACHAR;
 2457:         if (caseless)
 2458:           {
 2459: #ifdef SUPPORT_UTF
 2460:           if (utf && d >= 128)
 2461:             {
 2462: #ifdef SUPPORT_UCP
 2463:             otherd = UCD_OTHERCASE(d);
 2464: #endif  /* SUPPORT_UCP */
 2465:             }
 2466:           else
 2467: #endif  /* SUPPORT_UTF */
 2468:           otherd = TABLE_GET(d, fcc, d);
 2469:           }
 2470:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
 2471:           {
 2472:           if (++count >= (int)GET2(code, 1))
 2473:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
 2474:           else
 2475:             { ADD_NEW(state_offset, count); }
 2476:           }
 2477:         }
 2478:       break;
 2479: 
 2480:       /*-----------------------------------------------------------------*/
 2481:       case OP_UPTOI:
 2482:       case OP_MINUPTOI:
 2483:       case OP_POSUPTOI:
 2484:       case OP_NOTUPTOI:
 2485:       case OP_NOTMINUPTOI:
 2486:       case OP_NOTPOSUPTOI:
 2487:       caseless = TRUE;
 2488:       codevalue -= OP_STARI - OP_STAR;
 2489:       /* Fall through */
 2490:       case OP_UPTO:
 2491:       case OP_MINUPTO:
 2492:       case OP_POSUPTO:
 2493:       case OP_NOTUPTO:
 2494:       case OP_NOTMINUPTO:
 2495:       case OP_NOTPOSUPTO:
 2496:       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
 2497:       count = current_state->count;  /* Number already matched */
 2498:       if (clen > 0)
 2499:         {
 2500:         pcre_uint32 otherd = NOTACHAR;
 2501:         if (caseless)
 2502:           {
 2503: #ifdef SUPPORT_UTF
 2504:           if (utf && d >= 128)
 2505:             {
 2506: #ifdef SUPPORT_UCP
 2507:             otherd = UCD_OTHERCASE(d);
 2508: #endif  /* SUPPORT_UCP */
 2509:             }
 2510:           else
 2511: #endif  /* SUPPORT_UTF */
 2512:           otherd = TABLE_GET(d, fcc, d);
 2513:           }
 2514:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
 2515:           {
 2516:           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
 2517:             {
 2518:             active_count--;             /* Remove non-match possibility */
 2519:             next_active_state--;
 2520:             }
 2521:           if (++count >= (int)GET2(code, 1))
 2522:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
 2523:           else
 2524:             { ADD_NEW(state_offset, count); }
 2525:           }
 2526:         }
 2527:       break;
 2528: 
 2529: 
 2530: /* ========================================================================== */
 2531:       /* These are the class-handling opcodes */
 2532: 
 2533:       case OP_CLASS:
 2534:       case OP_NCLASS:
 2535:       case OP_XCLASS:
 2536:         {
 2537:         BOOL isinclass = FALSE;
 2538:         int next_state_offset;
 2539:         const pcre_uchar *ecode;
 2540: 
 2541:         /* For a simple class, there is always just a 32-byte table, and we
 2542:         can set isinclass from it. */
 2543: 
 2544:         if (codevalue != OP_XCLASS)
 2545:           {
 2546:           ecode = code + 1 + (32 / sizeof(pcre_uchar));
 2547:           if (clen > 0)
 2548:             {
 2549:             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
 2550:               ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
 2551:             }
 2552:           }
 2553: 
 2554:         /* An extended class may have a table or a list of single characters,
 2555:         ranges, or both, and it may be positive or negative. There's a
 2556:         function that sorts all this out. */
 2557: 
 2558:         else
 2559:          {
 2560:          ecode = code + GET(code, 1);
 2561:          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
 2562:          }
 2563: 
 2564:         /* At this point, isinclass is set for all kinds of class, and ecode
 2565:         points to the byte after the end of the class. If there is a
 2566:         quantifier, this is where it will be. */
 2567: 
 2568:         next_state_offset = (int)(ecode - start_code);
 2569: 
 2570:         switch (*ecode)
 2571:           {
 2572:           case OP_CRSTAR:
 2573:           case OP_CRMINSTAR:
 2574:           case OP_CRPOSSTAR:
 2575:           ADD_ACTIVE(next_state_offset + 1, 0);
 2576:           if (isinclass)
 2577:             {
 2578:             if (*ecode == OP_CRPOSSTAR)
 2579:               {
 2580:               active_count--;           /* Remove non-match possibility */
 2581:               next_active_state--;
 2582:               }
 2583:             ADD_NEW(state_offset, 0);
 2584:             }
 2585:           break;
 2586: 
 2587:           case OP_CRPLUS:
 2588:           case OP_CRMINPLUS:
 2589:           case OP_CRPOSPLUS:
 2590:           count = current_state->count;  /* Already matched */
 2591:           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
 2592:           if (isinclass)
 2593:             {
 2594:             if (count > 0 && *ecode == OP_CRPOSPLUS)
 2595:               {
 2596:               active_count--;           /* Remove non-match possibility */
 2597:               next_active_state--;
 2598:               }
 2599:             count++;
 2600:             ADD_NEW(state_offset, count);
 2601:             }
 2602:           break;
 2603: 
 2604:           case OP_CRQUERY:
 2605:           case OP_CRMINQUERY:
 2606:           case OP_CRPOSQUERY:
 2607:           ADD_ACTIVE(next_state_offset + 1, 0);
 2608:           if (isinclass)
 2609:             {
 2610:             if (*ecode == OP_CRPOSQUERY)
 2611:               {
 2612:               active_count--;           /* Remove non-match possibility */
 2613:               next_active_state--;
 2614:               }
 2615:             ADD_NEW(next_state_offset + 1, 0);
 2616:             }
 2617:           break;
 2618: 
 2619:           case OP_CRRANGE:
 2620:           case OP_CRMINRANGE:
 2621:           case OP_CRPOSRANGE:
 2622:           count = current_state->count;  /* Already matched */
 2623:           if (count >= (int)GET2(ecode, 1))
 2624:             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
 2625:           if (isinclass)
 2626:             {
 2627:             int max = (int)GET2(ecode, 1 + IMM2_SIZE);
 2628:             if (*ecode == OP_CRPOSRANGE)
 2629:               {
 2630:               active_count--;           /* Remove non-match possibility */
 2631:               next_active_state--;
 2632:               }
 2633:             if (++count >= max && max != 0)   /* Max 0 => no limit */
 2634:               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
 2635:             else
 2636:               { ADD_NEW(state_offset, count); }
 2637:             }
 2638:           break;
 2639: 
 2640:           default:
 2641:           if (isinclass) { ADD_NEW(next_state_offset, 0); }
 2642:           break;
 2643:           }
 2644:         }
 2645:       break;
 2646: 
 2647: /* ========================================================================== */
 2648:       /* These are the opcodes for fancy brackets of various kinds. We have
 2649:       to use recursion in order to handle them. The "always failing" assertion
 2650:       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
 2651:       though the other "backtracking verbs" are not supported. */
 2652: 
 2653:       case OP_FAIL:
 2654:       forced_fail++;    /* Count FAILs for multiple states */
 2655:       break;
 2656: 
 2657:       case OP_ASSERT:
 2658:       case OP_ASSERT_NOT:
 2659:       case OP_ASSERTBACK:
 2660:       case OP_ASSERTBACK_NOT:
 2661:         {
 2662:         int rc;
 2663:         int local_offsets[2];
 2664:         int local_workspace[1000];
 2665:         const pcre_uchar *endasscode = code + GET(code, 1);
 2666: 
 2667:         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
 2668: 
 2669:         rc = internal_dfa_exec(
 2670:           md,                                   /* static match data */
 2671:           code,                                 /* this subexpression's code */
 2672:           ptr,                                  /* where we currently are */
 2673:           (int)(ptr - start_subject),           /* start offset */
 2674:           local_offsets,                        /* offset vector */
 2675:           sizeof(local_offsets)/sizeof(int),    /* size of same */
 2676:           local_workspace,                      /* workspace vector */
 2677:           sizeof(local_workspace)/sizeof(int),  /* size of same */
 2678:           rlevel);                              /* function recursion level */
 2679: 
 2680:         if (rc == PCRE_ERROR_DFA_UITEM) return rc;
 2681:         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
 2682:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
 2683:         }
 2684:       break;
 2685: 
 2686:       /*-----------------------------------------------------------------*/
 2687:       case OP_COND:
 2688:       case OP_SCOND:
 2689:         {
 2690:         int local_offsets[1000];
 2691:         int local_workspace[1000];
 2692:         int codelink = GET(code, 1);
 2693:         int condcode;
 2694: 
 2695:         /* Because of the way auto-callout works during compile, a callout item
 2696:         is inserted between OP_COND and an assertion condition. This does not
 2697:         happen for the other conditions. */
 2698: 
 2699:         if (code[LINK_SIZE+1] == OP_CALLOUT)
 2700:           {
 2701:           rrc = 0;
 2702:           if (PUBL(callout) != NULL)
 2703:             {
 2704:             PUBL(callout_block) cb;
 2705:             cb.version          = 1;   /* Version 1 of the callout block */
 2706:             cb.callout_number   = code[LINK_SIZE+2];
 2707:             cb.offset_vector    = offsets;
 2708: #if defined COMPILE_PCRE8
 2709:             cb.subject          = (PCRE_SPTR)start_subject;
 2710: #elif defined COMPILE_PCRE16
 2711:             cb.subject          = (PCRE_SPTR16)start_subject;
 2712: #elif defined COMPILE_PCRE32
 2713:             cb.subject          = (PCRE_SPTR32)start_subject;
 2714: #endif
 2715:             cb.subject_length   = (int)(end_subject - start_subject);
 2716:             cb.start_match      = (int)(current_subject - start_subject);
 2717:             cb.current_position = (int)(ptr - start_subject);
 2718:             cb.pattern_position = GET(code, LINK_SIZE + 3);
 2719:             cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
 2720:             cb.capture_top      = 1;
 2721:             cb.capture_last     = -1;
 2722:             cb.callout_data     = md->callout_data;
 2723:             cb.mark             = NULL;   /* No (*MARK) support */
 2724:             if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
 2725:             }
 2726:           if (rrc > 0) break;                      /* Fail this thread */
 2727:           code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
 2728:           }
 2729: 
 2730:         condcode = code[LINK_SIZE+1];
 2731: 
 2732:         /* Back reference conditions and duplicate named recursion conditions
 2733:         are not supported */
 2734: 
 2735:         if (condcode == OP_CREF || condcode == OP_DNCREF ||
 2736:             condcode == OP_DNRREF)
 2737:           return PCRE_ERROR_DFA_UCOND;
 2738: 
 2739:         /* The DEFINE condition is always false */
 2740: 
 2741:         if (condcode == OP_DEF)
 2742:           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
 2743: 
 2744:         /* The only supported version of OP_RREF is for the value RREF_ANY,
 2745:         which means "test if in any recursion". We can't test for specifically
 2746:         recursed groups. */
 2747: 
 2748:         else if (condcode == OP_RREF)
 2749:           {
 2750:           int value = GET2(code, LINK_SIZE + 2);
 2751:           if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
 2752:           if (md->recursive != NULL)
 2753:             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
 2754:           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
 2755:           }
 2756: 
 2757:         /* Otherwise, the condition is an assertion */
 2758: 
 2759:         else
 2760:           {
 2761:           int rc;
 2762:           const pcre_uchar *asscode = code + LINK_SIZE + 1;
 2763:           const pcre_uchar *endasscode = asscode + GET(asscode, 1);
 2764: 
 2765:           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
 2766: 
 2767:           rc = internal_dfa_exec(
 2768:             md,                                   /* fixed match data */
 2769:             asscode,                              /* this subexpression's code */
 2770:             ptr,                                  /* where we currently are */
 2771:             (int)(ptr - start_subject),           /* start offset */
 2772:             local_offsets,                        /* offset vector */
 2773:             sizeof(local_offsets)/sizeof(int),    /* size of same */
 2774:             local_workspace,                      /* workspace vector */
 2775:             sizeof(local_workspace)/sizeof(int),  /* size of same */
 2776:             rlevel);                              /* function recursion level */
 2777: 
 2778:           if (rc == PCRE_ERROR_DFA_UITEM) return rc;
 2779:           if ((rc >= 0) ==
 2780:                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
 2781:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
 2782:           else
 2783:             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
 2784:           }
 2785:         }
 2786:       break;
 2787: 
 2788:       /*-----------------------------------------------------------------*/
 2789:       case OP_RECURSE:
 2790:         {
 2791:         dfa_recursion_info *ri;
 2792:         int local_offsets[1000];
 2793:         int local_workspace[1000];
 2794:         const pcre_uchar *callpat = start_code + GET(code, 1);
 2795:         int recno = (callpat == md->start_code)? 0 :
 2796:           GET2(callpat, 1 + LINK_SIZE);
 2797:         int rc;
 2798: 
 2799:         DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
 2800: 
 2801:         /* Check for repeating a recursion without advancing the subject
 2802:         pointer. This should catch convoluted mutual recursions. (Some simple
 2803:         cases are caught at compile time.) */
 2804: 
 2805:         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
 2806:           if (recno == ri->group_num && ptr == ri->subject_position)
 2807:             return PCRE_ERROR_RECURSELOOP;
 2808: 
 2809:         /* Remember this recursion and where we started it so as to
 2810:         catch infinite loops. */
 2811: 
 2812:         new_recursive.group_num = recno;
 2813:         new_recursive.subject_position = ptr;
 2814:         new_recursive.prevrec = md->recursive;
 2815:         md->recursive = &new_recursive;
 2816: 
 2817:         rc = internal_dfa_exec(
 2818:           md,                                   /* fixed match data */
 2819:           callpat,                              /* this subexpression's code */
 2820:           ptr,                                  /* where we currently are */
 2821:           (int)(ptr - start_subject),           /* start offset */
 2822:           local_offsets,                        /* offset vector */
 2823:           sizeof(local_offsets)/sizeof(int),    /* size of same */
 2824:           local_workspace,                      /* workspace vector */
 2825:           sizeof(local_workspace)/sizeof(int),  /* size of same */
 2826:           rlevel);                              /* function recursion level */
 2827: 
 2828:         md->recursive = new_recursive.prevrec;  /* Done this recursion */
 2829: 
 2830:         DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
 2831:           rc));
 2832: 
 2833:         /* Ran out of internal offsets */
 2834: 
 2835:         if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
 2836: 
 2837:         /* For each successful matched substring, set up the next state with a
 2838:         count of characters to skip before trying it. Note that the count is in
 2839:         characters, not bytes. */
 2840: 
 2841:         if (rc > 0)
 2842:           {
 2843:           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
 2844:             {
 2845:             int charcount = local_offsets[rc+1] - local_offsets[rc];
 2846: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
 2847:             if (utf)
 2848:               {
 2849:               const pcre_uchar *p = start_subject + local_offsets[rc];
 2850:               const pcre_uchar *pp = start_subject + local_offsets[rc+1];
 2851:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 2852:               }
 2853: #endif
 2854:             if (charcount > 0)
 2855:               {
 2856:               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
 2857:               }
 2858:             else
 2859:               {
 2860:               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
 2861:               }
 2862:             }
 2863:           }
 2864:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
 2865:         }
 2866:       break;
 2867: 
 2868:       /*-----------------------------------------------------------------*/
 2869:       case OP_BRAPOS:
 2870:       case OP_SBRAPOS:
 2871:       case OP_CBRAPOS:
 2872:       case OP_SCBRAPOS:
 2873:       case OP_BRAPOSZERO:
 2874:         {
 2875:         int charcount, matched_count;
 2876:         const pcre_uchar *local_ptr = ptr;
 2877:         BOOL allow_zero;
 2878: 
 2879:         if (codevalue == OP_BRAPOSZERO)
 2880:           {
 2881:           allow_zero = TRUE;
 2882:           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
 2883:           }
 2884:         else allow_zero = FALSE;
 2885: 
 2886:         /* Loop to match the subpattern as many times as possible as if it were
 2887:         a complete pattern. */
 2888: 
 2889:         for (matched_count = 0;; matched_count++)
 2890:           {
 2891:           int local_offsets[2];
 2892:           int local_workspace[1000];
 2893: 
 2894:           int rc = internal_dfa_exec(
 2895:             md,                                   /* fixed match data */
 2896:             code,                                 /* this subexpression's code */
 2897:             local_ptr,                            /* where we currently are */
 2898:             (int)(ptr - start_subject),           /* start offset */
 2899:             local_offsets,                        /* offset vector */
 2900:             sizeof(local_offsets)/sizeof(int),    /* size of same */
 2901:             local_workspace,                      /* workspace vector */
 2902:             sizeof(local_workspace)/sizeof(int),  /* size of same */
 2903:             rlevel);                              /* function recursion level */
 2904: 
 2905:           /* Failed to match */
 2906: 
 2907:           if (rc < 0)
 2908:             {
 2909:             if (rc != PCRE_ERROR_NOMATCH) return rc;
 2910:             break;
 2911:             }
 2912: 
 2913:           /* Matched: break the loop if zero characters matched. */
 2914: 
 2915:           charcount = local_offsets[1] - local_offsets[0];
 2916:           if (charcount == 0) break;
 2917:           local_ptr += charcount;    /* Advance temporary position ptr */
 2918:           }
 2919: 
 2920:         /* At this point we have matched the subpattern matched_count
 2921:         times, and local_ptr is pointing to the character after the end of the
 2922:         last match. */
 2923: 
 2924:         if (matched_count > 0 || allow_zero)
 2925:           {
 2926:           const pcre_uchar *end_subpattern = code;
 2927:           int next_state_offset;
 2928: 
 2929:           do { end_subpattern += GET(end_subpattern, 1); }
 2930:             while (*end_subpattern == OP_ALT);
 2931:           next_state_offset =
 2932:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
 2933: 
 2934:           /* Optimization: if there are no more active states, and there
 2935:           are no new states yet set up, then skip over the subject string
 2936:           right here, to save looping. Otherwise, set up the new state to swing
 2937:           into action when the end of the matched substring is reached. */
 2938: 
 2939:           if (i + 1 >= active_count && new_count == 0)
 2940:             {
 2941:             ptr = local_ptr;
 2942:             clen = 0;
 2943:             ADD_NEW(next_state_offset, 0);
 2944:             }
 2945:           else
 2946:             {
 2947:             const pcre_uchar *p = ptr;
 2948:             const pcre_uchar *pp = local_ptr;
 2949:             charcount = (int)(pp - p);
 2950: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
 2951:             if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 2952: #endif
 2953:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
 2954:             }
 2955:           }
 2956:         }
 2957:       break;
 2958: 
 2959:       /*-----------------------------------------------------------------*/
 2960:       case OP_ONCE:
 2961:       case OP_ONCE_NC:
 2962:         {
 2963:         int local_offsets[2];
 2964:         int local_workspace[1000];
 2965: 
 2966:         int rc = internal_dfa_exec(
 2967:           md,                                   /* fixed match data */
 2968:           code,                                 /* this subexpression's code */
 2969:           ptr,                                  /* where we currently are */
 2970:           (int)(ptr - start_subject),           /* start offset */
 2971:           local_offsets,                        /* offset vector */
 2972:           sizeof(local_offsets)/sizeof(int),    /* size of same */
 2973:           local_workspace,                      /* workspace vector */
 2974:           sizeof(local_workspace)/sizeof(int),  /* size of same */
 2975:           rlevel);                              /* function recursion level */
 2976: 
 2977:         if (rc >= 0)
 2978:           {
 2979:           const pcre_uchar *end_subpattern = code;
 2980:           int charcount = local_offsets[1] - local_offsets[0];
 2981:           int next_state_offset, repeat_state_offset;
 2982: 
 2983:           do { end_subpattern += GET(end_subpattern, 1); }
 2984:             while (*end_subpattern == OP_ALT);
 2985:           next_state_offset =
 2986:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
 2987: 
 2988:           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
 2989:           arrange for the repeat state also to be added to the relevant list.
 2990:           Calculate the offset, or set -1 for no repeat. */
 2991: 
 2992:           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
 2993:                                  *end_subpattern == OP_KETRMIN)?
 2994:             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
 2995: 
 2996:           /* If we have matched an empty string, add the next state at the
 2997:           current character pointer. This is important so that the duplicate
 2998:           checking kicks in, which is what breaks infinite loops that match an
 2999:           empty string. */
 3000: 
 3001:           if (charcount == 0)
 3002:             {
 3003:             ADD_ACTIVE(next_state_offset, 0);
 3004:             }
 3005: 
 3006:           /* Optimization: if there are no more active states, and there
 3007:           are no new states yet set up, then skip over the subject string
 3008:           right here, to save looping. Otherwise, set up the new state to swing
 3009:           into action when the end of the matched substring is reached. */
 3010: 
 3011:           else if (i + 1 >= active_count && new_count == 0)
 3012:             {
 3013:             ptr += charcount;
 3014:             clen = 0;
 3015:             ADD_NEW(next_state_offset, 0);
 3016: 
 3017:             /* If we are adding a repeat state at the new character position,
 3018:             we must fudge things so that it is the only current state.
 3019:             Otherwise, it might be a duplicate of one we processed before, and
 3020:             that would cause it to be skipped. */
 3021: 
 3022:             if (repeat_state_offset >= 0)
 3023:               {
 3024:               next_active_state = active_states;
 3025:               active_count = 0;
 3026:               i = -1;
 3027:               ADD_ACTIVE(repeat_state_offset, 0);
 3028:               }
 3029:             }
 3030:           else
 3031:             {
 3032: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
 3033:             if (utf)
 3034:               {
 3035:               const pcre_uchar *p = start_subject + local_offsets[0];
 3036:               const pcre_uchar *pp = start_subject + local_offsets[1];
 3037:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 3038:               }
 3039: #endif
 3040:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
 3041:             if (repeat_state_offset >= 0)
 3042:               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
 3043:             }
 3044:           }
 3045:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
 3046:         }
 3047:       break;
 3048: 
 3049: 
 3050: /* ========================================================================== */
 3051:       /* Handle callouts */
 3052: 
 3053:       case OP_CALLOUT:
 3054:       rrc = 0;
 3055:       if (PUBL(callout) != NULL)
 3056:         {
 3057:         PUBL(callout_block) cb;
 3058:         cb.version          = 1;   /* Version 1 of the callout block */
 3059:         cb.callout_number   = code[1];
 3060:         cb.offset_vector    = offsets;
 3061: #if defined COMPILE_PCRE8
 3062:         cb.subject          = (PCRE_SPTR)start_subject;
 3063: #elif defined COMPILE_PCRE16
 3064:         cb.subject          = (PCRE_SPTR16)start_subject;
 3065: #elif defined COMPILE_PCRE32
 3066:         cb.subject          = (PCRE_SPTR32)start_subject;
 3067: #endif
 3068:         cb.subject_length   = (int)(end_subject - start_subject);
 3069:         cb.start_match      = (int)(current_subject - start_subject);
 3070:         cb.current_position = (int)(ptr - start_subject);
 3071:         cb.pattern_position = GET(code, 2);
 3072:         cb.next_item_length = GET(code, 2 + LINK_SIZE);
 3073:         cb.capture_top      = 1;
 3074:         cb.capture_last     = -1;
 3075:         cb.callout_data     = md->callout_data;
 3076:         cb.mark             = NULL;   /* No (*MARK) support */
 3077:         if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
 3078:         }
 3079:       if (rrc == 0)
 3080:         { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
 3081:       break;
 3082: 
 3083: 
 3084: /* ========================================================================== */
 3085:       default:        /* Unsupported opcode */
 3086:       return PCRE_ERROR_DFA_UITEM;
 3087:       }
 3088: 
 3089:     NEXT_ACTIVE_STATE: continue;
 3090: 
 3091:     }      /* End of loop scanning active states */
 3092: 
 3093:   /* We have finished the processing at the current subject character. If no
 3094:   new states have been set for the next character, we have found all the
 3095:   matches that we are going to find. If we are at the top level and partial
 3096:   matching has been requested, check for appropriate conditions.
 3097: 
 3098:   The "forced_ fail" variable counts the number of (*F) encountered for the
 3099:   character. If it is equal to the original active_count (saved in
 3100:   workspace[1]) it means that (*F) was found on every active state. In this
 3101:   case we don't want to give a partial match.
 3102: 
 3103:   The "could_continue" variable is true if a state could have continued but
 3104:   for the fact that the end of the subject was reached. */
 3105: 
 3106:   if (new_count <= 0)
 3107:     {
 3108:     if (rlevel == 1 &&                               /* Top level, and */
 3109:         could_continue &&                            /* Some could go on, and */
 3110:         forced_fail != workspace[1] &&               /* Not all forced fail & */
 3111:         (                                            /* either... */
 3112:         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
 3113:         ||                                           /* or... */
 3114:         ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
 3115:          match_count < 0)                            /* no matches */
 3116:         ) &&                                         /* And... */
 3117:         (
 3118:         partial_newline ||                           /* Either partial NL */
 3119:           (                                          /* or ... */
 3120:           ptr >= end_subject &&                /* End of subject and */
 3121:           ptr > md->start_used_ptr)            /* Inspected non-empty string */
 3122:           )
 3123:         )
 3124:       match_count = PCRE_ERROR_PARTIAL;
 3125:     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
 3126:       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
 3127:       rlevel*2-2, SP));
 3128:     break;        /* In effect, "return", but see the comment below */
 3129:     }
 3130: 
 3131:   /* One or more states are active for the next character. */
 3132: 
 3133:   ptr += clen;    /* Advance to next subject character */
 3134:   }               /* Loop to move along the subject string */
 3135: 
 3136: /* Control gets here from "break" a few lines above. We do it this way because
 3137: if we use "return" above, we have compiler trouble. Some compilers warn if
 3138: there's nothing here because they think the function doesn't return a value. On
 3139: the other hand, if we put a dummy statement here, some more clever compilers
 3140: complain that it can't be reached. Sigh. */
 3141: 
 3142: return match_count;
 3143: }
 3144: 
 3145: 
 3146: 
 3147: 
 3148: /*************************************************
 3149: *    Execute a Regular Expression - DFA engine   *
 3150: *************************************************/
 3151: 
 3152: /* This external function applies a compiled re to a subject string using a DFA
 3153: engine. This function calls the internal function multiple times if the pattern
 3154: is not anchored.
 3155: 
 3156: Arguments:
 3157:   argument_re     points to the compiled expression
 3158:   extra_data      points to extra data or is NULL
 3159:   subject         points to the subject string
 3160:   length          length of subject string (may contain binary zeros)
 3161:   start_offset    where to start in the subject string
 3162:   options         option bits
 3163:   offsets         vector of match offsets
 3164:   offsetcount     size of same
 3165:   workspace       workspace vector
 3166:   wscount         size of same
 3167: 
 3168: Returns:          > 0 => number of match offset pairs placed in offsets
 3169:                   = 0 => offsets overflowed; longest matches are present
 3170:                    -1 => failed to match
 3171:                  < -1 => some kind of unexpected problem
 3172: */
 3173: 
 3174: #if defined COMPILE_PCRE8
 3175: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 3176: pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
 3177:   const char *subject, int length, int start_offset, int options, int *offsets,
 3178:   int offsetcount, int *workspace, int wscount)
 3179: #elif defined COMPILE_PCRE16
 3180: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 3181: pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
 3182:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
 3183:   int offsetcount, int *workspace, int wscount)
 3184: #elif defined COMPILE_PCRE32
 3185: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 3186: pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
 3187:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
 3188:   int offsetcount, int *workspace, int wscount)
 3189: #endif
 3190: {
 3191: REAL_PCRE *re = (REAL_PCRE *)argument_re;
 3192: dfa_match_data match_block;
 3193: dfa_match_data *md = &match_block;
 3194: BOOL utf, anchored, startline, firstline;
 3195: const pcre_uchar *current_subject, *end_subject;
 3196: const pcre_study_data *study = NULL;
 3197: 
 3198: const pcre_uchar *req_char_ptr;
 3199: const pcre_uint8 *start_bits = NULL;
 3200: BOOL has_first_char = FALSE;
 3201: BOOL has_req_char = FALSE;
 3202: pcre_uchar first_char = 0;
 3203: pcre_uchar first_char2 = 0;
 3204: pcre_uchar req_char = 0;
 3205: pcre_uchar req_char2 = 0;
 3206: int newline;
 3207: 
 3208: /* Plausibility checks */
 3209: 
 3210: if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
 3211: if (re == NULL || subject == NULL || workspace == NULL ||
 3212:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 3213: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 3214: if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
 3215: if (length < 0) return PCRE_ERROR_BADLENGTH;
 3216: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 3217: 
 3218: /* Check that the first field in the block is the magic number. If it is not,
 3219: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
 3220: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
 3221: means that the pattern is likely compiled with different endianness. */
 3222: 
 3223: if (re->magic_number != MAGIC_NUMBER)
 3224:   return re->magic_number == REVERSED_MAGIC_NUMBER?
 3225:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
 3226: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 3227: 
 3228: /* If restarting after a partial match, do some sanity checks on the contents
 3229: of the workspace. */
 3230: 
 3231: if ((options & PCRE_DFA_RESTART) != 0)
 3232:   {
 3233:   if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
 3234:     workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
 3235:       return PCRE_ERROR_DFA_BADRESTART;
 3236:   }
 3237: 
 3238: /* Set up study, callout, and table data */
 3239: 
 3240: md->tables = re->tables;
 3241: md->callout_data = NULL;
 3242: 
 3243: if (extra_data != NULL)
 3244:   {
 3245:   unsigned int flags = extra_data->flags;
 3246:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
 3247:     study = (const pcre_study_data *)extra_data->study_data;
 3248:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
 3249:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
 3250:     return PCRE_ERROR_DFA_UMLIMIT;
 3251:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
 3252:     md->callout_data = extra_data->callout_data;
 3253:   if ((flags & PCRE_EXTRA_TABLES) != 0)
 3254:     md->tables = extra_data->tables;
 3255:   }
 3256: 
 3257: /* Set some local values */
 3258: 
 3259: current_subject = (const pcre_uchar *)subject + start_offset;
 3260: end_subject = (const pcre_uchar *)subject + length;
 3261: req_char_ptr = current_subject - 1;
 3262: 
 3263: #ifdef SUPPORT_UTF
 3264: /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
 3265: utf = (re->options & PCRE_UTF8) != 0;
 3266: #else
 3267: utf = FALSE;
 3268: #endif
 3269: 
 3270: anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
 3271:   (re->options & PCRE_ANCHORED) != 0;
 3272: 
 3273: /* The remaining fixed data for passing around. */
 3274: 
 3275: md->start_code = (const pcre_uchar *)argument_re +
 3276:     re->name_table_offset + re->name_count * re->name_entry_size;
 3277: md->start_subject = (const pcre_uchar *)subject;
 3278: md->end_subject = end_subject;
 3279: md->start_offset = start_offset;
 3280: md->moptions = options;
 3281: md->poptions = re->options;
 3282: 
 3283: /* If the BSR option is not set at match time, copy what was set
 3284: at compile time. */
 3285: 
 3286: if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
 3287:   {
 3288:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
 3289:     md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
 3290: #ifdef BSR_ANYCRLF
 3291:   else md->moptions |= PCRE_BSR_ANYCRLF;
 3292: #endif
 3293:   }
 3294: 
 3295: /* Handle different types of newline. The three bits give eight cases. If
 3296: nothing is set at run time, whatever was used at compile time applies. */
 3297: 
 3298: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
 3299:          PCRE_NEWLINE_BITS)
 3300:   {
 3301:   case 0: newline = NEWLINE; break;   /* Compile-time default */
 3302:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
 3303:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
 3304:   case PCRE_NEWLINE_CR+
 3305:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
 3306:   case PCRE_NEWLINE_ANY: newline = -1; break;
 3307:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
 3308:   default: return PCRE_ERROR_BADNEWLINE;
 3309:   }
 3310: 
 3311: if (newline == -2)
 3312:   {
 3313:   md->nltype = NLTYPE_ANYCRLF;
 3314:   }
 3315: else if (newline < 0)
 3316:   {
 3317:   md->nltype = NLTYPE_ANY;
 3318:   }
 3319: else
 3320:   {
 3321:   md->nltype = NLTYPE_FIXED;
 3322:   if (newline > 255)
 3323:     {
 3324:     md->nllen = 2;
 3325:     md->nl[0] = (newline >> 8) & 255;
 3326:     md->nl[1] = newline & 255;
 3327:     }
 3328:   else
 3329:     {
 3330:     md->nllen = 1;
 3331:     md->nl[0] = newline;
 3332:     }
 3333:   }
 3334: 
 3335: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
 3336: back the character offset. */
 3337: 
 3338: #ifdef SUPPORT_UTF
 3339: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
 3340:   {
 3341:   int erroroffset;
 3342:   int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
 3343:   if (errorcode != 0)
 3344:     {
 3345:     if (offsetcount >= 2)
 3346:       {
 3347:       offsets[0] = erroroffset;
 3348:       offsets[1] = errorcode;
 3349:       }
 3350: #if defined COMPILE_PCRE8
 3351:     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
 3352:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
 3353: #elif defined COMPILE_PCRE16
 3354:     return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
 3355:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
 3356: #elif defined COMPILE_PCRE32
 3357:     return PCRE_ERROR_BADUTF32;
 3358: #endif
 3359:     }
 3360: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
 3361:   if (start_offset > 0 && start_offset < length &&
 3362:         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
 3363:     return PCRE_ERROR_BADUTF8_OFFSET;
 3364: #endif
 3365:   }
 3366: #endif
 3367: 
 3368: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
 3369: is a feature that makes it possible to save compiled regex and re-use them
 3370: in other programs later. */
 3371: 
 3372: if (md->tables == NULL) md->tables = PRIV(default_tables);
 3373: 
 3374: /* The "must be at the start of a line" flags are used in a loop when finding
 3375: where to start. */
 3376: 
 3377: startline = (re->flags & PCRE_STARTLINE) != 0;
 3378: firstline = (re->options & PCRE_FIRSTLINE) != 0;
 3379: 
 3380: /* Set up the first character to match, if available. The first_byte value is
 3381: never set for an anchored regular expression, but the anchoring may be forced
 3382: at run time, so we have to test for anchoring. The first char may be unset for
 3383: an unanchored pattern, of course. If there's no first char and the pattern was
 3384: studied, there may be a bitmap of possible first characters. */
 3385: 
 3386: if (!anchored)
 3387:   {
 3388:   if ((re->flags & PCRE_FIRSTSET) != 0)
 3389:     {
 3390:     has_first_char = TRUE;
 3391:     first_char = first_char2 = (pcre_uchar)(re->first_char);
 3392:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
 3393:       {
 3394:       first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
 3395: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
 3396:       if (utf && first_char > 127)
 3397:         first_char2 = UCD_OTHERCASE(first_char);
 3398: #endif
 3399:       }
 3400:     }
 3401:   else
 3402:     {
 3403:     if (!startline && study != NULL &&
 3404:          (study->flags & PCRE_STUDY_MAPPED) != 0)
 3405:       start_bits = study->start_bits;
 3406:     }
 3407:   }
 3408: 
 3409: /* For anchored or unanchored matches, there may be a "last known required
 3410: character" set. */
 3411: 
 3412: if ((re->flags & PCRE_REQCHSET) != 0)
 3413:   {
 3414:   has_req_char = TRUE;
 3415:   req_char = req_char2 = (pcre_uchar)(re->req_char);
 3416:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
 3417:     {
 3418:     req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
 3419: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
 3420:     if (utf && req_char > 127)
 3421:       req_char2 = UCD_OTHERCASE(req_char);
 3422: #endif
 3423:     }
 3424:   }
 3425: 
 3426: /* Call the main matching function, looping for a non-anchored regex after a
 3427: failed match. If not restarting, perform certain optimizations at the start of
 3428: a match. */
 3429: 
 3430: for (;;)
 3431:   {
 3432:   int rc;
 3433: 
 3434:   if ((options & PCRE_DFA_RESTART) == 0)
 3435:     {
 3436:     const pcre_uchar *save_end_subject = end_subject;
 3437: 
 3438:     /* If firstline is TRUE, the start of the match is constrained to the first
 3439:     line of a multiline string. Implement this by temporarily adjusting
 3440:     end_subject so that we stop scanning at a newline. If the match fails at
 3441:     the newline, later code breaks this loop. */
 3442: 
 3443:     if (firstline)
 3444:       {
 3445:       PCRE_PUCHAR t = current_subject;
 3446: #ifdef SUPPORT_UTF
 3447:       if (utf)
 3448:         {
 3449:         while (t < md->end_subject && !IS_NEWLINE(t))
 3450:           {
 3451:           t++;
 3452:           ACROSSCHAR(t < end_subject, *t, t++);
 3453:           }
 3454:         }
 3455:       else
 3456: #endif
 3457:       while (t < md->end_subject && !IS_NEWLINE(t)) t++;
 3458:       end_subject = t;
 3459:       }
 3460: 
 3461:     /* There are some optimizations that avoid running the match if a known
 3462:     starting point is not found. However, there is an option that disables
 3463:     these, for testing and for ensuring that all callouts do actually occur.
 3464:     The option can be set in the regex by (*NO_START_OPT) or passed in
 3465:     match-time options. */
 3466: 
 3467:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
 3468:       {
 3469:       /* Advance to a known first char. */
 3470: 
 3471:       if (has_first_char)
 3472:         {
 3473:         if (first_char != first_char2)
 3474:           {
 3475:           pcre_uchar csc;
 3476:           while (current_subject < end_subject &&
 3477:                  (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
 3478:             current_subject++;
 3479:           }
 3480:         else
 3481:           while (current_subject < end_subject &&
 3482:                  RAWUCHARTEST(current_subject) != first_char)
 3483:             current_subject++;
 3484:         }
 3485: 
 3486:       /* Or to just after a linebreak for a multiline match if possible */
 3487: 
 3488:       else if (startline)
 3489:         {
 3490:         if (current_subject > md->start_subject + start_offset)
 3491:           {
 3492: #ifdef SUPPORT_UTF
 3493:           if (utf)
 3494:             {
 3495:             while (current_subject < end_subject &&
 3496:                    !WAS_NEWLINE(current_subject))
 3497:               {
 3498:               current_subject++;
 3499:               ACROSSCHAR(current_subject < end_subject, *current_subject,
 3500:                 current_subject++);
 3501:               }
 3502:             }
 3503:           else
 3504: #endif
 3505:           while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
 3506:             current_subject++;
 3507: 
 3508:           /* If we have just passed a CR and the newline option is ANY or
 3509:           ANYCRLF, and we are now at a LF, advance the match position by one
 3510:           more character. */
 3511: 
 3512:           if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
 3513:                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
 3514:                current_subject < end_subject &&
 3515:                RAWUCHARTEST(current_subject) == CHAR_NL)
 3516:             current_subject++;
 3517:           }
 3518:         }
 3519: 
 3520:       /* Or to a non-unique first char after study */
 3521: 
 3522:       else if (start_bits != NULL)
 3523:         {
 3524:         while (current_subject < end_subject)
 3525:           {
 3526:           register pcre_uint32 c = RAWUCHARTEST(current_subject);
 3527: #ifndef COMPILE_PCRE8
 3528:           if (c > 255) c = 255;
 3529: #endif
 3530:           if ((start_bits[c/8] & (1 << (c&7))) == 0)
 3531:             {
 3532:             current_subject++;
 3533: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 3534:             /* In non 8-bit mode, the iteration will stop for
 3535:             characters > 255 at the beginning or not stop at all. */
 3536:             if (utf)
 3537:               ACROSSCHAR(current_subject < end_subject, *current_subject,
 3538:                 current_subject++);
 3539: #endif
 3540:             }
 3541:           else break;
 3542:           }
 3543:         }
 3544:       }
 3545: 
 3546:     /* Restore fudged end_subject */
 3547: 
 3548:     end_subject = save_end_subject;
 3549: 
 3550:     /* The following two optimizations are disabled for partial matching or if
 3551:     disabling is explicitly requested (and of course, by the test above, this
 3552:     code is not obeyed when restarting after a partial match). */
 3553: 
 3554:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
 3555:         (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
 3556:       {
 3557:       /* If the pattern was studied, a minimum subject length may be set. This
 3558:       is a lower bound; no actual string of that length may actually match the
 3559:       pattern. Although the value is, strictly, in characters, we treat it as
 3560:       bytes to avoid spending too much time in this optimization. */
 3561: 
 3562:       if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
 3563:           (pcre_uint32)(end_subject - current_subject) < study->minlength)
 3564:         return PCRE_ERROR_NOMATCH;
 3565: 
 3566:       /* If req_char is set, we know that that character must appear in the
 3567:       subject for the match to succeed. If the first character is set, req_char
 3568:       must be later in the subject; otherwise the test starts at the match
 3569:       point. This optimization can save a huge amount of work in patterns with
 3570:       nested unlimited repeats that aren't going to match. Writing separate
 3571:       code for cased/caseless versions makes it go faster, as does using an
 3572:       autoincrement and backing off on a match.
 3573: 
 3574:       HOWEVER: when the subject string is very, very long, searching to its end
 3575:       can take a long time, and give bad performance on quite ordinary
 3576:       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
 3577:       string... so we don't do this when the string is sufficiently long. */
 3578: 
 3579:       if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
 3580:         {
 3581:         register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
 3582: 
 3583:         /* We don't need to repeat the search if we haven't yet reached the
 3584:         place we found it at last time. */
 3585: 
 3586:         if (p > req_char_ptr)
 3587:           {
 3588:           if (req_char != req_char2)
 3589:             {
 3590:             while (p < end_subject)
 3591:               {
 3592:               register pcre_uint32 pp = RAWUCHARINCTEST(p);
 3593:               if (pp == req_char || pp == req_char2) { p--; break; }
 3594:               }
 3595:             }
 3596:           else
 3597:             {
 3598:             while (p < end_subject)
 3599:               {
 3600:               if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
 3601:               }
 3602:             }
 3603: 
 3604:           /* If we can't find the required character, break the matching loop,
 3605:           which will cause a return or PCRE_ERROR_NOMATCH. */
 3606: 
 3607:           if (p >= end_subject) break;
 3608: 
 3609:           /* If we have found the required character, save the point where we
 3610:           found it, so that we don't search again next time round the loop if
 3611:           the start hasn't passed this character yet. */
 3612: 
 3613:           req_char_ptr = p;
 3614:           }
 3615:         }
 3616:       }
 3617:     }   /* End of optimizations that are done when not restarting */
 3618: 
 3619:   /* OK, now we can do the business */
 3620: 
 3621:   md->start_used_ptr = current_subject;
 3622:   md->recursive = NULL;
 3623: 
 3624:   rc = internal_dfa_exec(
 3625:     md,                                /* fixed match data */
 3626:     md->start_code,                    /* this subexpression's code */
 3627:     current_subject,                   /* where we currently are */
 3628:     start_offset,                      /* start offset in subject */
 3629:     offsets,                           /* offset vector */
 3630:     offsetcount,                       /* size of same */
 3631:     workspace,                         /* workspace vector */
 3632:     wscount,                           /* size of same */
 3633:     0);                                /* function recurse level */
 3634: 
 3635:   /* Anything other than "no match" means we are done, always; otherwise, carry
 3636:   on only if not anchored. */
 3637: 
 3638:   if (rc != PCRE_ERROR_NOMATCH || anchored)
 3639:     {
 3640:     if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
 3641:       {
 3642:       offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
 3643:       offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
 3644:       if (offsetcount > 2)
 3645:         offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
 3646:       }
 3647:     return rc;
 3648:     }
 3649: 
 3650:   /* Advance to the next subject character unless we are at the end of a line
 3651:   and firstline is set. */
 3652: 
 3653:   if (firstline && IS_NEWLINE(current_subject)) break;
 3654:   current_subject++;
 3655: #ifdef SUPPORT_UTF
 3656:   if (utf)
 3657:     {
 3658:     ACROSSCHAR(current_subject < end_subject, *current_subject,
 3659:       current_subject++);
 3660:     }
 3661: #endif
 3662:   if (current_subject > end_subject) break;
 3663: 
 3664:   /* If we have just passed a CR and we are now at a LF, and the pattern does
 3665:   not contain any explicit matches for \r or \n, and the newline option is CRLF
 3666:   or ANY or ANYCRLF, advance the match position by one more character. */
 3667: 
 3668:   if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
 3669:       current_subject < end_subject &&
 3670:       RAWUCHARTEST(current_subject) == CHAR_NL &&
 3671:       (re->flags & PCRE_HASCRORLF) == 0 &&
 3672:         (md->nltype == NLTYPE_ANY ||
 3673:          md->nltype == NLTYPE_ANYCRLF ||
 3674:          md->nllen == 2))
 3675:     current_subject++;
 3676: 
 3677:   }   /* "Bumpalong" loop */
 3678: 
 3679: return PCRE_ERROR_NOMATCH;
 3680: }
 3681: 
 3682: /* End of pcre_dfa_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>