Annotation of embedaddon/pcre/pcre_dfa_exec.c, revision 1.1.1.5

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language (but see
                      7: below for why this module is different).
                      8: 
                      9:                        Written by Philip Hazel
1.1.1.4   misho      10:            Copyright (c) 1997-2013 University of Cambridge
1.1       misho      11: 
                     12: -----------------------------------------------------------------------------
                     13: Redistribution and use in source and binary forms, with or without
                     14: modification, are permitted provided that the following conditions are met:
                     15: 
                     16:     * Redistributions of source code must retain the above copyright notice,
                     17:       this list of conditions and the following disclaimer.
                     18: 
                     19:     * Redistributions in binary form must reproduce the above copyright
                     20:       notice, this list of conditions and the following disclaimer in the
                     21:       documentation and/or other materials provided with the distribution.
                     22: 
                     23:     * Neither the name of the University of Cambridge nor the names of its
                     24:       contributors may be used to endorse or promote products derived from
                     25:       this software without specific prior written permission.
                     26: 
                     27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37: POSSIBILITY OF SUCH DAMAGE.
                     38: -----------------------------------------------------------------------------
                     39: */
                     40: 
                     41: /* This module contains the external function pcre_dfa_exec(), which is an
                     42: alternative matching function that uses a sort of DFA algorithm (not a true
1.1.1.3   misho      43: FSM). This is NOT Perl-compatible, but it has advantages in certain
1.1       misho      44: applications. */
                     45: 
                     46: 
                     47: /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
                     48: the performance of his patterns greatly. I could not use it as it stood, as it
                     49: was not thread safe, and made assumptions about pattern sizes. Also, it caused
                     50: test 7 to loop, and test 9 to crash with a segfault.
                     51: 
                     52: The issue is the check for duplicate states, which is done by a simple linear
                     53: search up the state list. (Grep for "duplicate" below to find the code.) For
                     54: many patterns, there will never be many states active at one time, so a simple
                     55: linear search is fine. In patterns that have many active states, it might be a
                     56: bottleneck. The suggested code used an indexing scheme to remember which states
                     57: had previously been used for each character, and avoided the linear search when
                     58: it knew there was no chance of a duplicate. This was implemented when adding
                     59: states to the state lists.
                     60: 
                     61: I wrote some thread-safe, not-limited code to try something similar at the time
                     62: of checking for duplicates (instead of when adding states), using index vectors
                     63: on the stack. It did give a 13% improvement with one specially constructed
                     64: pattern for certain subject strings, but on other strings and on many of the
                     65: simpler patterns in the test suite it did worse. The major problem, I think,
                     66: was the extra time to initialize the index. This had to be done for each call
                     67: of internal_dfa_exec(). (The supplied patch used a static vector, initialized
                     68: only once - I suspect this was the cause of the problems with the tests.)
                     69: 
                     70: Overall, I concluded that the gains in some cases did not outweigh the losses
                     71: in others, so I abandoned this code. */
                     72: 
                     73: 
                     74: 
                     75: #ifdef HAVE_CONFIG_H
                     76: #include "config.h"
                     77: #endif
                     78: 
                     79: #define NLBLOCK md             /* Block containing newline information */
                     80: #define PSSTART start_subject  /* Field containing processed string start */
                     81: #define PSEND   end_subject    /* Field containing processed string end */
                     82: 
                     83: #include "pcre_internal.h"
                     84: 
                     85: 
                     86: /* For use to indent debugging output */
                     87: 
                     88: #define SP "                   "
                     89: 
                     90: 
                     91: /*************************************************
                     92: *      Code parameters and static tables         *
                     93: *************************************************/
                     94: 
                     95: /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
                     96: into others, under special conditions. A gap of 20 between the blocks should be
                     97: enough. The resulting opcodes don't have to be less than 256 because they are
                     98: never stored, so we push them well clear of the normal opcodes. */
                     99: 
                    100: #define OP_PROP_EXTRA       300
                    101: #define OP_EXTUNI_EXTRA     320
                    102: #define OP_ANYNL_EXTRA      340
                    103: #define OP_HSPACE_EXTRA     360
                    104: #define OP_VSPACE_EXTRA     380
                    105: 
                    106: 
                    107: /* This table identifies those opcodes that are followed immediately by a
                    108: character that is to be tested in some way. This makes it possible to
                    109: centralize the loading of these characters. In the case of Type * etc, the
                    110: "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
                    111: small value. Non-zero values in the table are the offsets from the opcode where
                    112: the character is to be found. ***NOTE*** If the start of this table is
                    113: modified, the three tables that follow must also be modified. */
                    114: 
1.1.1.2   misho     115: static const pcre_uint8 coptable[] = {
1.1       misho     116:   0,                             /* End                                    */
                    117:   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
                    118:   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
                    119:   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
                    120:   0, 0,                          /* \P, \p                                 */
                    121:   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
                    122:   0,                             /* \X                                     */
1.1.1.5 ! misho     123:   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
1.1       misho     124:   1,                             /* Char                                   */
                    125:   1,                             /* Chari                                  */
                    126:   1,                             /* not                                    */
                    127:   1,                             /* noti                                   */
                    128:   /* Positive single-char repeats                                          */
                    129:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
1.1.1.2   misho     130:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
                    131:   1+IMM2_SIZE,                   /* exact                                  */
                    132:   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
1.1       misho     133:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
1.1.1.2   misho     134:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
                    135:   1+IMM2_SIZE,                   /* exact I                                */
                    136:   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
1.1       misho     137:   /* Negative single-char repeats - only for chars < 256                   */
                    138:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
1.1.1.2   misho     139:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
                    140:   1+IMM2_SIZE,                   /* NOT exact                              */
                    141:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
1.1       misho     142:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
1.1.1.2   misho     143:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
                    144:   1+IMM2_SIZE,                   /* NOT exact I                            */
                    145:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
1.1       misho     146:   /* Positive type repeats                                                 */
                    147:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
1.1.1.2   misho     148:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
                    149:   1+IMM2_SIZE,                   /* Type exact                             */
                    150:   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
1.1       misho     151:   /* Character class & ref repeats                                         */
                    152:   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
                    153:   0, 0,                          /* CRRANGE, CRMINRANGE                    */
1.1.1.5 ! misho     154:   0, 0, 0, 0,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
1.1       misho     155:   0,                             /* CLASS                                  */
                    156:   0,                             /* NCLASS                                 */
                    157:   0,                             /* XCLASS - variable length               */
                    158:   0,                             /* REF                                    */
                    159:   0,                             /* REFI                                   */
1.1.1.5 ! misho     160:   0,                             /* DNREF                                  */
        !           161:   0,                             /* DNREFI                                 */
1.1       misho     162:   0,                             /* RECURSE                                */
                    163:   0,                             /* CALLOUT                                */
                    164:   0,                             /* Alt                                    */
                    165:   0,                             /* Ket                                    */
                    166:   0,                             /* KetRmax                                */
                    167:   0,                             /* KetRmin                                */
                    168:   0,                             /* KetRpos                                */
                    169:   0,                             /* Reverse                                */
                    170:   0,                             /* Assert                                 */
                    171:   0,                             /* Assert not                             */
                    172:   0,                             /* Assert behind                          */
                    173:   0,                             /* Assert behind not                      */
                    174:   0, 0,                          /* ONCE, ONCE_NC                          */
                    175:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    176:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
1.1.1.5 ! misho     177:   0, 0,                          /* CREF, DNCREF                           */
        !           178:   0, 0,                          /* RREF, DNRREF                           */
1.1       misho     179:   0,                             /* DEF                                    */
                    180:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    181:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    182:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    183:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    184:   0, 0                           /* CLOSE, SKIPZERO  */
                    185: };
                    186: 
                    187: /* This table identifies those opcodes that inspect a character. It is used to
                    188: remember the fact that a character could have been inspected when the end of
                    189: the subject is reached. ***NOTE*** If the start of this table is modified, the
                    190: two tables that follow must also be modified. */
                    191: 
1.1.1.2   misho     192: static const pcre_uint8 poptable[] = {
1.1       misho     193:   0,                             /* End                                    */
                    194:   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
                    195:   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
                    196:   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
                    197:   1, 1,                          /* \P, \p                                 */
                    198:   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
                    199:   1,                             /* \X                                     */
1.1.1.5 ! misho     200:   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
1.1       misho     201:   1,                             /* Char                                   */
                    202:   1,                             /* Chari                                  */
                    203:   1,                             /* not                                    */
                    204:   1,                             /* noti                                   */
                    205:   /* Positive single-char repeats                                          */
                    206:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    207:   1, 1, 1,                       /* upto, minupto, exact                   */
                    208:   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
                    209:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
                    210:   1, 1, 1,                       /* upto I, minupto I, exact I             */
                    211:   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
                    212:   /* Negative single-char repeats - only for chars < 256                   */
                    213:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
                    214:   1, 1, 1,                       /* NOT upto, minupto, exact               */
                    215:   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
                    216:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
                    217:   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
                    218:   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
                    219:   /* Positive type repeats                                                 */
                    220:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
                    221:   1, 1, 1,                       /* Type upto, minupto, exact              */
                    222:   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
                    223:   /* Character class & ref repeats                                         */
                    224:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    225:   1, 1,                          /* CRRANGE, CRMINRANGE                    */
1.1.1.5 ! misho     226:   1, 1, 1, 1,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
1.1       misho     227:   1,                             /* CLASS                                  */
                    228:   1,                             /* NCLASS                                 */
                    229:   1,                             /* XCLASS - variable length               */
                    230:   0,                             /* REF                                    */
                    231:   0,                             /* REFI                                   */
1.1.1.5 ! misho     232:   0,                             /* DNREF                                  */
        !           233:   0,                             /* DNREFI                                 */
1.1       misho     234:   0,                             /* RECURSE                                */
                    235:   0,                             /* CALLOUT                                */
                    236:   0,                             /* Alt                                    */
                    237:   0,                             /* Ket                                    */
                    238:   0,                             /* KetRmax                                */
                    239:   0,                             /* KetRmin                                */
                    240:   0,                             /* KetRpos                                */
                    241:   0,                             /* Reverse                                */
                    242:   0,                             /* Assert                                 */
                    243:   0,                             /* Assert not                             */
                    244:   0,                             /* Assert behind                          */
                    245:   0,                             /* Assert behind not                      */
                    246:   0, 0,                          /* ONCE, ONCE_NC                          */
                    247:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    248:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
1.1.1.5 ! misho     249:   0, 0,                          /* CREF, DNCREF                           */
        !           250:   0, 0,                          /* RREF, DNRREF                           */
1.1       misho     251:   0,                             /* DEF                                    */
                    252:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    253:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    254:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    255:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    256:   0, 0                           /* CLOSE, SKIPZERO                        */
                    257: };
                    258: 
                    259: /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
                    260: and \w */
                    261: 
1.1.1.2   misho     262: static const pcre_uint8 toptable1[] = {
1.1       misho     263:   0, 0, 0, 0, 0, 0,
                    264:   ctype_digit, ctype_digit,
                    265:   ctype_space, ctype_space,
                    266:   ctype_word,  ctype_word,
                    267:   0, 0                            /* OP_ANY, OP_ALLANY */
                    268: };
                    269: 
1.1.1.2   misho     270: static const pcre_uint8 toptable2[] = {
1.1       misho     271:   0, 0, 0, 0, 0, 0,
                    272:   ctype_digit, 0,
                    273:   ctype_space, 0,
                    274:   ctype_word,  0,
                    275:   1, 1                            /* OP_ANY, OP_ALLANY */
                    276: };
                    277: 
                    278: 
                    279: /* Structure for holding data about a particular state, which is in effect the
                    280: current data for an active path through the match tree. It must consist
                    281: entirely of ints because the working vector we are passed, and which we put
                    282: these structures in, is a vector of ints. */
                    283: 
                    284: typedef struct stateblock {
                    285:   int offset;                     /* Offset to opcode */
                    286:   int count;                      /* Count for repeats */
                    287:   int data;                       /* Some use extra data */
                    288: } stateblock;
                    289: 
1.1.1.3   misho     290: #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
1.1       misho     291: 
                    292: 
                    293: #ifdef PCRE_DEBUG
                    294: /*************************************************
                    295: *             Print character string             *
                    296: *************************************************/
                    297: 
                    298: /* Character string printing function for debugging.
                    299: 
                    300: Arguments:
                    301:   p            points to string
                    302:   length       number of bytes
                    303:   f            where to print
                    304: 
                    305: Returns:       nothing
                    306: */
                    307: 
                    308: static void
1.1.1.2   misho     309: pchars(const pcre_uchar *p, int length, FILE *f)
1.1       misho     310: {
1.1.1.4   misho     311: pcre_uint32 c;
1.1       misho     312: while (length-- > 0)
                    313:   {
                    314:   if (isprint(c = *(p++)))
                    315:     fprintf(f, "%c", c);
                    316:   else
1.1.1.4   misho     317:     fprintf(f, "\\x{%02x}", c);
1.1       misho     318:   }
                    319: }
                    320: #endif
                    321: 
                    322: 
                    323: 
                    324: /*************************************************
                    325: *    Execute a Regular Expression - DFA engine   *
                    326: *************************************************/
                    327: 
                    328: /* This internal function applies a compiled pattern to a subject string,
                    329: starting at a given point, using a DFA engine. This function is called from the
                    330: external one, possibly multiple times if the pattern is not anchored. The
                    331: function calls itself recursively for some kinds of subpattern.
                    332: 
                    333: Arguments:
                    334:   md                the match_data block with fixed information
                    335:   this_start_code   the opening bracket of this subexpression's code
                    336:   current_subject   where we currently are in the subject string
                    337:   start_offset      start offset in the subject string
                    338:   offsets           vector to contain the matching string offsets
                    339:   offsetcount       size of same
                    340:   workspace         vector of workspace
                    341:   wscount           size of same
                    342:   rlevel            function call recursion level
                    343: 
                    344: Returns:            > 0 => number of match offset pairs placed in offsets
                    345:                     = 0 => offsets overflowed; longest matches are present
                    346:                      -1 => failed to match
                    347:                    < -1 => some kind of unexpected problem
                    348: 
                    349: The following macros are used for adding states to the two state vectors (one
                    350: for the current character, one for the following character). */
                    351: 
                    352: #define ADD_ACTIVE(x,y) \
                    353:   if (active_count++ < wscount) \
                    354:     { \
                    355:     next_active_state->offset = (x); \
                    356:     next_active_state->count  = (y); \
                    357:     next_active_state++; \
                    358:     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    359:     } \
                    360:   else return PCRE_ERROR_DFA_WSSIZE
                    361: 
                    362: #define ADD_ACTIVE_DATA(x,y,z) \
                    363:   if (active_count++ < wscount) \
                    364:     { \
                    365:     next_active_state->offset = (x); \
                    366:     next_active_state->count  = (y); \
                    367:     next_active_state->data   = (z); \
                    368:     next_active_state++; \
                    369:     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
                    370:     } \
                    371:   else return PCRE_ERROR_DFA_WSSIZE
                    372: 
                    373: #define ADD_NEW(x,y) \
                    374:   if (new_count++ < wscount) \
                    375:     { \
                    376:     next_new_state->offset = (x); \
                    377:     next_new_state->count  = (y); \
                    378:     next_new_state++; \
                    379:     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    380:     } \
                    381:   else return PCRE_ERROR_DFA_WSSIZE
                    382: 
                    383: #define ADD_NEW_DATA(x,y,z) \
                    384:   if (new_count++ < wscount) \
                    385:     { \
                    386:     next_new_state->offset = (x); \
                    387:     next_new_state->count  = (y); \
                    388:     next_new_state->data   = (z); \
                    389:     next_new_state++; \
1.1.1.3   misho     390:     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
                    391:       (x), (y), (z), __LINE__)); \
1.1       misho     392:     } \
                    393:   else return PCRE_ERROR_DFA_WSSIZE
                    394: 
                    395: /* And now, here is the code */
                    396: 
                    397: static int
                    398: internal_dfa_exec(
                    399:   dfa_match_data *md,
1.1.1.2   misho     400:   const pcre_uchar *this_start_code,
                    401:   const pcre_uchar *current_subject,
1.1       misho     402:   int start_offset,
                    403:   int *offsets,
                    404:   int offsetcount,
                    405:   int *workspace,
                    406:   int wscount,
                    407:   int  rlevel)
                    408: {
                    409: stateblock *active_states, *new_states, *temp_states;
                    410: stateblock *next_active_state, *next_new_state;
                    411: 
1.1.1.2   misho     412: const pcre_uint8 *ctypes, *lcc, *fcc;
                    413: const pcre_uchar *ptr;
                    414: const pcre_uchar *end_code, *first_op;
1.1       misho     415: 
                    416: dfa_recursion_info new_recursive;
                    417: 
                    418: int active_count, new_count, match_count;
                    419: 
                    420: /* Some fields in the md block are frequently referenced, so we load them into
                    421: independent variables in the hope that this will perform better. */
                    422: 
1.1.1.2   misho     423: const pcre_uchar *start_subject = md->start_subject;
                    424: const pcre_uchar *end_subject = md->end_subject;
                    425: const pcre_uchar *start_code = md->start_code;
1.1       misho     426: 
1.1.1.2   misho     427: #ifdef SUPPORT_UTF
                    428: BOOL utf = (md->poptions & PCRE_UTF8) != 0;
1.1       misho     429: #else
1.1.1.2   misho     430: BOOL utf = FALSE;
1.1       misho     431: #endif
                    432: 
1.1.1.3   misho     433: BOOL reset_could_continue = FALSE;
                    434: 
1.1       misho     435: rlevel++;
                    436: offsetcount &= (-2);
                    437: 
                    438: wscount -= 2;
                    439: wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
                    440:           (2 * INTS_PER_STATEBLOCK);
                    441: 
                    442: DPRINTF(("\n%.*s---------------------\n"
                    443:   "%.*sCall to internal_dfa_exec f=%d\n",
                    444:   rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
                    445: 
                    446: ctypes = md->tables + ctypes_offset;
                    447: lcc = md->tables + lcc_offset;
                    448: fcc = md->tables + fcc_offset;
                    449: 
                    450: match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
                    451: 
                    452: active_states = (stateblock *)(workspace + 2);
                    453: next_new_state = new_states = active_states + wscount;
                    454: new_count = 0;
                    455: 
                    456: first_op = this_start_code + 1 + LINK_SIZE +
                    457:   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
1.1.1.2   misho     458:     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
                    459:     ? IMM2_SIZE:0);
1.1       misho     460: 
                    461: /* The first thing in any (sub) pattern is a bracket of some sort. Push all
                    462: the alternative states onto the list, and find out where the end is. This
                    463: makes is possible to use this function recursively, when we want to stop at a
                    464: matching internal ket rather than at the end.
                    465: 
                    466: If the first opcode in the first alternative is OP_REVERSE, we are dealing with
                    467: a backward assertion. In that case, we have to find out the maximum amount to
                    468: move back, and set up each alternative appropriately. */
                    469: 
                    470: if (*first_op == OP_REVERSE)
                    471:   {
                    472:   int max_back = 0;
                    473:   int gone_back;
                    474: 
                    475:   end_code = this_start_code;
                    476:   do
                    477:     {
                    478:     int back = GET(end_code, 2+LINK_SIZE);
                    479:     if (back > max_back) max_back = back;
                    480:     end_code += GET(end_code, 1);
                    481:     }
                    482:   while (*end_code == OP_ALT);
                    483: 
                    484:   /* If we can't go back the amount required for the longest lookbehind
                    485:   pattern, go back as far as we can; some alternatives may still be viable. */
                    486: 
1.1.1.2   misho     487: #ifdef SUPPORT_UTF
1.1       misho     488:   /* In character mode we have to step back character by character */
                    489: 
1.1.1.2   misho     490:   if (utf)
1.1       misho     491:     {
                    492:     for (gone_back = 0; gone_back < max_back; gone_back++)
                    493:       {
                    494:       if (current_subject <= start_subject) break;
                    495:       current_subject--;
1.1.1.2   misho     496:       ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
1.1       misho     497:       }
                    498:     }
                    499:   else
                    500: #endif
                    501: 
                    502:   /* In byte-mode we can do this quickly. */
                    503: 
                    504:     {
                    505:     gone_back = (current_subject - max_back < start_subject)?
                    506:       (int)(current_subject - start_subject) : max_back;
                    507:     current_subject -= gone_back;
                    508:     }
                    509: 
                    510:   /* Save the earliest consulted character */
                    511: 
                    512:   if (current_subject < md->start_used_ptr)
                    513:     md->start_used_ptr = current_subject;
                    514: 
                    515:   /* Now we can process the individual branches. */
                    516: 
                    517:   end_code = this_start_code;
                    518:   do
                    519:     {
                    520:     int back = GET(end_code, 2+LINK_SIZE);
                    521:     if (back <= gone_back)
                    522:       {
                    523:       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
                    524:       ADD_NEW_DATA(-bstate, 0, gone_back - back);
                    525:       }
                    526:     end_code += GET(end_code, 1);
                    527:     }
                    528:   while (*end_code == OP_ALT);
                    529:  }
                    530: 
                    531: /* This is the code for a "normal" subpattern (not a backward assertion). The
                    532: start of a whole pattern is always one of these. If we are at the top level,
                    533: we may be asked to restart matching from the same point that we reached for a
                    534: previous partial match. We still have to scan through the top-level branches to
                    535: find the end state. */
                    536: 
                    537: else
                    538:   {
                    539:   end_code = this_start_code;
                    540: 
                    541:   /* Restarting */
                    542: 
                    543:   if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
                    544:     {
                    545:     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
                    546:     new_count = workspace[1];
                    547:     if (!workspace[0])
                    548:       memcpy(new_states, active_states, new_count * sizeof(stateblock));
                    549:     }
                    550: 
                    551:   /* Not restarting */
                    552: 
                    553:   else
                    554:     {
                    555:     int length = 1 + LINK_SIZE +
                    556:       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
1.1.1.2   misho     557:         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
                    558:         ? IMM2_SIZE:0);
1.1       misho     559:     do
                    560:       {
                    561:       ADD_NEW((int)(end_code - start_code + length), 0);
                    562:       end_code += GET(end_code, 1);
                    563:       length = 1 + LINK_SIZE;
                    564:       }
                    565:     while (*end_code == OP_ALT);
                    566:     }
                    567:   }
                    568: 
                    569: workspace[0] = 0;    /* Bit indicating which vector is current */
                    570: 
1.1.1.2   misho     571: DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
1.1       misho     572: 
                    573: /* Loop for scanning the subject */
                    574: 
                    575: ptr = current_subject;
                    576: for (;;)
                    577:   {
                    578:   int i, j;
                    579:   int clen, dlen;
1.1.1.4   misho     580:   pcre_uint32 c, d;
1.1       misho     581:   int forced_fail = 0;
1.1.1.3   misho     582:   BOOL partial_newline = FALSE;
                    583:   BOOL could_continue = reset_could_continue;
                    584:   reset_could_continue = FALSE;
1.1       misho     585: 
                    586:   /* Make the new state list into the active state list and empty the
                    587:   new state list. */
                    588: 
                    589:   temp_states = active_states;
                    590:   active_states = new_states;
                    591:   new_states = temp_states;
                    592:   active_count = new_count;
                    593:   new_count = 0;
                    594: 
                    595:   workspace[0] ^= 1;              /* Remember for the restarting feature */
                    596:   workspace[1] = active_count;
                    597: 
                    598: #ifdef PCRE_DEBUG
                    599:   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
1.1.1.2   misho     600:   pchars(ptr, STRLEN_UC(ptr), stdout);
1.1       misho     601:   printf("\"\n");
                    602: 
                    603:   printf("%.*sActive states: ", rlevel*2-2, SP);
                    604:   for (i = 0; i < active_count; i++)
                    605:     printf("%d/%d ", active_states[i].offset, active_states[i].count);
                    606:   printf("\n");
                    607: #endif
                    608: 
                    609:   /* Set the pointers for adding new states */
                    610: 
                    611:   next_active_state = active_states + active_count;
                    612:   next_new_state = new_states;
                    613: 
                    614:   /* Load the current character from the subject outside the loop, as many
                    615:   different states may want to look at it, and we assume that at least one
                    616:   will. */
                    617: 
                    618:   if (ptr < end_subject)
                    619:     {
1.1.1.3   misho     620:     clen = 1;        /* Number of data items in the character */
1.1.1.2   misho     621: #ifdef SUPPORT_UTF
1.1.1.4   misho     622:     GETCHARLENTEST(c, ptr, clen);
                    623: #else
1.1       misho     624:     c = *ptr;
1.1.1.4   misho     625: #endif  /* SUPPORT_UTF */
1.1       misho     626:     }
                    627:   else
                    628:     {
                    629:     clen = 0;        /* This indicates the end of the subject */
                    630:     c = NOTACHAR;    /* This value should never actually be used */
                    631:     }
                    632: 
                    633:   /* Scan up the active states and act on each one. The result of an action
                    634:   may be to add more states to the currently active list (e.g. on hitting a
                    635:   parenthesis) or it may be to put states on the new list, for considering
                    636:   when we move the character pointer on. */
                    637: 
                    638:   for (i = 0; i < active_count; i++)
                    639:     {
                    640:     stateblock *current_state = active_states + i;
                    641:     BOOL caseless = FALSE;
1.1.1.2   misho     642:     const pcre_uchar *code;
1.1       misho     643:     int state_offset = current_state->offset;
1.1.1.4   misho     644:     int codevalue, rrc;
                    645:     int count;
1.1       misho     646: 
                    647: #ifdef PCRE_DEBUG
                    648:     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
                    649:     if (clen == 0) printf("EOL\n");
                    650:       else if (c > 32 && c < 127) printf("'%c'\n", c);
                    651:         else printf("0x%02x\n", c);
                    652: #endif
                    653: 
                    654:     /* A negative offset is a special case meaning "hold off going to this
                    655:     (negated) state until the number of characters in the data field have
1.1.1.3   misho     656:     been skipped". If the could_continue flag was passed over from a previous
                    657:     state, arrange for it to passed on. */
1.1       misho     658: 
                    659:     if (state_offset < 0)
                    660:       {
                    661:       if (current_state->data > 0)
                    662:         {
                    663:         DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
                    664:         ADD_NEW_DATA(state_offset, current_state->count,
                    665:           current_state->data - 1);
1.1.1.3   misho     666:         if (could_continue) reset_could_continue = TRUE;
1.1       misho     667:         continue;
                    668:         }
                    669:       else
                    670:         {
                    671:         current_state->offset = state_offset = -state_offset;
                    672:         }
                    673:       }
                    674: 
                    675:     /* Check for a duplicate state with the same count, and skip if found.
                    676:     See the note at the head of this module about the possibility of improving
                    677:     performance here. */
                    678: 
                    679:     for (j = 0; j < i; j++)
                    680:       {
                    681:       if (active_states[j].offset == state_offset &&
                    682:           active_states[j].count == current_state->count)
                    683:         {
                    684:         DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
                    685:         goto NEXT_ACTIVE_STATE;
                    686:         }
                    687:       }
                    688: 
                    689:     /* The state offset is the offset to the opcode */
                    690: 
                    691:     code = start_code + state_offset;
                    692:     codevalue = *code;
                    693: 
                    694:     /* If this opcode inspects a character, but we are at the end of the
                    695:     subject, remember the fact for use when testing for a partial match. */
                    696: 
                    697:     if (clen == 0 && poptable[codevalue] != 0)
                    698:       could_continue = TRUE;
                    699: 
                    700:     /* If this opcode is followed by an inline character, load it. It is
                    701:     tempting to test for the presence of a subject character here, but that
                    702:     is wrong, because sometimes zero repetitions of the subject are
                    703:     permitted.
                    704: 
                    705:     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
1.1.1.3   misho     706:     argument that is not a data character - but is always one byte long because
                    707:     the values are small. We have to take special action to deal with  \P, \p,
                    708:     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
                    709:     these ones to new opcodes. */
1.1       misho     710: 
                    711:     if (coptable[codevalue] > 0)
                    712:       {
                    713:       dlen = 1;
1.1.1.2   misho     714: #ifdef SUPPORT_UTF
                    715:       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
                    716: #endif  /* SUPPORT_UTF */
1.1       misho     717:       d = code[coptable[codevalue]];
                    718:       if (codevalue >= OP_TYPESTAR)
                    719:         {
                    720:         switch(d)
                    721:           {
                    722:           case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
                    723:           case OP_NOTPROP:
                    724:           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
                    725:           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
                    726:           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
                    727:           case OP_NOT_HSPACE:
                    728:           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
                    729:           case OP_NOT_VSPACE:
                    730:           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
                    731:           default: break;
                    732:           }
                    733:         }
                    734:       }
                    735:     else
                    736:       {
                    737:       dlen = 0;         /* Not strictly necessary, but compilers moan */
                    738:       d = NOTACHAR;     /* if these variables are not set. */
                    739:       }
                    740: 
                    741: 
                    742:     /* Now process the individual opcodes */
                    743: 
                    744:     switch (codevalue)
                    745:       {
                    746: /* ========================================================================== */
                    747:       /* These cases are never obeyed. This is a fudge that causes a compile-
                    748:       time error if the vectors coptable or poptable, which are indexed by
                    749:       opcode, are not the correct length. It seems to be the only way to do
                    750:       such a check at compile time, as the sizeof() operator does not work
                    751:       in the C preprocessor. */
                    752: 
                    753:       case OP_TABLE_LENGTH:
                    754:       case OP_TABLE_LENGTH +
                    755:         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
                    756:          (sizeof(poptable) == OP_TABLE_LENGTH)):
                    757:       break;
                    758: 
                    759: /* ========================================================================== */
                    760:       /* Reached a closing bracket. If not at the end of the pattern, carry
                    761:       on with the next opcode. For repeating opcodes, also add the repeat
                    762:       state. Note that KETRPOS will always be encountered at the end of the
                    763:       subpattern, because the possessive subpattern repeats are always handled
                    764:       using recursive calls. Thus, it never adds any new states.
                    765: 
                    766:       At the end of the (sub)pattern, unless we have an empty string and
                    767:       PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
                    768:       start of the subject, save the match data, shifting up all previous
                    769:       matches so we always have the longest first. */
                    770: 
                    771:       case OP_KET:
                    772:       case OP_KETRMIN:
                    773:       case OP_KETRMAX:
                    774:       case OP_KETRPOS:
                    775:       if (code != end_code)
                    776:         {
                    777:         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
                    778:         if (codevalue != OP_KET)
                    779:           {
                    780:           ADD_ACTIVE(state_offset - GET(code, 1), 0);
                    781:           }
                    782:         }
                    783:       else
                    784:         {
                    785:         if (ptr > current_subject ||
                    786:             ((md->moptions & PCRE_NOTEMPTY) == 0 &&
                    787:               ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
                    788:                 current_subject > start_subject + md->start_offset)))
                    789:           {
                    790:           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
                    791:             else if (match_count > 0 && ++match_count * 2 > offsetcount)
                    792:               match_count = 0;
                    793:           count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
                    794:           if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
                    795:           if (offsetcount >= 2)
                    796:             {
                    797:             offsets[0] = (int)(current_subject - start_subject);
                    798:             offsets[1] = (int)(ptr - start_subject);
                    799:             DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
1.1.1.3   misho     800:               offsets[1] - offsets[0], (char *)current_subject));
1.1       misho     801:             }
                    802:           if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
                    803:             {
                    804:             DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                    805:               "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
                    806:               match_count, rlevel*2-2, SP));
                    807:             return match_count;
                    808:             }
                    809:           }
                    810:         }
                    811:       break;
                    812: 
                    813: /* ========================================================================== */
                    814:       /* These opcodes add to the current list of states without looking
                    815:       at the current character. */
                    816: 
                    817:       /*-----------------------------------------------------------------*/
                    818:       case OP_ALT:
                    819:       do { code += GET(code, 1); } while (*code == OP_ALT);
                    820:       ADD_ACTIVE((int)(code - start_code), 0);
                    821:       break;
                    822: 
                    823:       /*-----------------------------------------------------------------*/
                    824:       case OP_BRA:
                    825:       case OP_SBRA:
                    826:       do
                    827:         {
                    828:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    829:         code += GET(code, 1);
                    830:         }
                    831:       while (*code == OP_ALT);
                    832:       break;
                    833: 
                    834:       /*-----------------------------------------------------------------*/
                    835:       case OP_CBRA:
                    836:       case OP_SCBRA:
1.1.1.2   misho     837:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
1.1       misho     838:       code += GET(code, 1);
                    839:       while (*code == OP_ALT)
                    840:         {
                    841:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
                    842:         code += GET(code, 1);
                    843:         }
                    844:       break;
                    845: 
                    846:       /*-----------------------------------------------------------------*/
                    847:       case OP_BRAZERO:
                    848:       case OP_BRAMINZERO:
                    849:       ADD_ACTIVE(state_offset + 1, 0);
                    850:       code += 1 + GET(code, 2);
                    851:       while (*code == OP_ALT) code += GET(code, 1);
                    852:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    853:       break;
                    854: 
                    855:       /*-----------------------------------------------------------------*/
                    856:       case OP_SKIPZERO:
                    857:       code += 1 + GET(code, 2);
                    858:       while (*code == OP_ALT) code += GET(code, 1);
                    859:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    860:       break;
                    861: 
                    862:       /*-----------------------------------------------------------------*/
                    863:       case OP_CIRC:
                    864:       if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
                    865:         { ADD_ACTIVE(state_offset + 1, 0); }
                    866:       break;
                    867: 
                    868:       /*-----------------------------------------------------------------*/
                    869:       case OP_CIRCM:
                    870:       if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
                    871:           (ptr != end_subject && WAS_NEWLINE(ptr)))
                    872:         { ADD_ACTIVE(state_offset + 1, 0); }
                    873:       break;
                    874: 
                    875:       /*-----------------------------------------------------------------*/
                    876:       case OP_EOD:
                    877:       if (ptr >= end_subject)
                    878:         {
                    879:         if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    880:           could_continue = TRUE;
                    881:         else { ADD_ACTIVE(state_offset + 1, 0); }
                    882:         }
                    883:       break;
                    884: 
                    885:       /*-----------------------------------------------------------------*/
                    886:       case OP_SOD:
                    887:       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
                    888:       break;
                    889: 
                    890:       /*-----------------------------------------------------------------*/
                    891:       case OP_SOM:
                    892:       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
                    893:       break;
                    894: 
                    895: 
                    896: /* ========================================================================== */
                    897:       /* These opcodes inspect the next subject character, and sometimes
                    898:       the previous one as well, but do not have an argument. The variable
                    899:       clen contains the length of the current character and is zero if we are
                    900:       at the end of the subject. */
                    901: 
                    902:       /*-----------------------------------------------------------------*/
                    903:       case OP_ANY:
                    904:       if (clen > 0 && !IS_NEWLINE(ptr))
1.1.1.3   misho     905:         {
                    906:         if (ptr + 1 >= md->end_subject &&
                    907:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                    908:             NLBLOCK->nltype == NLTYPE_FIXED &&
                    909:             NLBLOCK->nllen == 2 &&
                    910:             c == NLBLOCK->nl[0])
                    911:           {
                    912:           could_continue = partial_newline = TRUE;
                    913:           }
                    914:         else
                    915:           {
                    916:           ADD_NEW(state_offset + 1, 0);
                    917:           }
                    918:         }
1.1       misho     919:       break;
                    920: 
                    921:       /*-----------------------------------------------------------------*/
                    922:       case OP_ALLANY:
                    923:       if (clen > 0)
                    924:         { ADD_NEW(state_offset + 1, 0); }
                    925:       break;
                    926: 
                    927:       /*-----------------------------------------------------------------*/
                    928:       case OP_EODN:
                    929:       if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    930:         could_continue = TRUE;
                    931:       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
                    932:         { ADD_ACTIVE(state_offset + 1, 0); }
                    933:       break;
                    934: 
                    935:       /*-----------------------------------------------------------------*/
                    936:       case OP_DOLL:
                    937:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    938:         {
                    939:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    940:           could_continue = TRUE;
                    941:         else if (clen == 0 ||
                    942:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
                    943:                (ptr == end_subject - md->nllen)
                    944:             ))
                    945:           { ADD_ACTIVE(state_offset + 1, 0); }
1.1.1.3   misho     946:         else if (ptr + 1 >= md->end_subject &&
                    947:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
                    948:                  NLBLOCK->nltype == NLTYPE_FIXED &&
                    949:                  NLBLOCK->nllen == 2 &&
                    950:                  c == NLBLOCK->nl[0])
                    951:           {
                    952:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    953:             {
                    954:             reset_could_continue = TRUE;
                    955:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                    956:             }
                    957:           else could_continue = partial_newline = TRUE;
                    958:           }
1.1       misho     959:         }
                    960:       break;
                    961: 
                    962:       /*-----------------------------------------------------------------*/
                    963:       case OP_DOLLM:
                    964:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    965:         {
                    966:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    967:           could_continue = TRUE;
                    968:         else if (clen == 0 ||
                    969:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
                    970:           { ADD_ACTIVE(state_offset + 1, 0); }
1.1.1.3   misho     971:         else if (ptr + 1 >= md->end_subject &&
                    972:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
                    973:                  NLBLOCK->nltype == NLTYPE_FIXED &&
                    974:                  NLBLOCK->nllen == 2 &&
                    975:                  c == NLBLOCK->nl[0])
                    976:           {
                    977:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    978:             {
                    979:             reset_could_continue = TRUE;
                    980:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                    981:             }
                    982:           else could_continue = partial_newline = TRUE;
                    983:           }
1.1       misho     984:         }
                    985:       else if (IS_NEWLINE(ptr))
                    986:         { ADD_ACTIVE(state_offset + 1, 0); }
                    987:       break;
                    988: 
                    989:       /*-----------------------------------------------------------------*/
                    990: 
                    991:       case OP_DIGIT:
                    992:       case OP_WHITESPACE:
                    993:       case OP_WORDCHAR:
                    994:       if (clen > 0 && c < 256 &&
                    995:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
                    996:         { ADD_NEW(state_offset + 1, 0); }
                    997:       break;
                    998: 
                    999:       /*-----------------------------------------------------------------*/
                   1000:       case OP_NOT_DIGIT:
                   1001:       case OP_NOT_WHITESPACE:
                   1002:       case OP_NOT_WORDCHAR:
                   1003:       if (clen > 0 && (c >= 256 ||
                   1004:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
                   1005:         { ADD_NEW(state_offset + 1, 0); }
                   1006:       break;
                   1007: 
                   1008:       /*-----------------------------------------------------------------*/
                   1009:       case OP_WORD_BOUNDARY:
                   1010:       case OP_NOT_WORD_BOUNDARY:
                   1011:         {
                   1012:         int left_word, right_word;
                   1013: 
                   1014:         if (ptr > start_subject)
                   1015:           {
1.1.1.2   misho    1016:           const pcre_uchar *temp = ptr - 1;
1.1       misho    1017:           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1.1.1.4   misho    1018: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.2   misho    1019:           if (utf) { BACKCHAR(temp); }
1.1       misho    1020: #endif
                   1021:           GETCHARTEST(d, temp);
                   1022: #ifdef SUPPORT_UCP
                   1023:           if ((md->poptions & PCRE_UCP) != 0)
                   1024:             {
                   1025:             if (d == '_') left_word = TRUE; else
                   1026:               {
                   1027:               int cat = UCD_CATEGORY(d);
                   1028:               left_word = (cat == ucp_L || cat == ucp_N);
                   1029:               }
                   1030:             }
                   1031:           else
                   1032: #endif
                   1033:           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
                   1034:           }
                   1035:         else left_word = FALSE;
                   1036: 
                   1037:         if (clen > 0)
                   1038:           {
                   1039: #ifdef SUPPORT_UCP
                   1040:           if ((md->poptions & PCRE_UCP) != 0)
                   1041:             {
                   1042:             if (c == '_') right_word = TRUE; else
                   1043:               {
                   1044:               int cat = UCD_CATEGORY(c);
                   1045:               right_word = (cat == ucp_L || cat == ucp_N);
                   1046:               }
                   1047:             }
                   1048:           else
                   1049: #endif
                   1050:           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
                   1051:           }
                   1052:         else right_word = FALSE;
                   1053: 
                   1054:         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
                   1055:           { ADD_ACTIVE(state_offset + 1, 0); }
                   1056:         }
                   1057:       break;
                   1058: 
                   1059: 
                   1060:       /*-----------------------------------------------------------------*/
                   1061:       /* Check the next character by Unicode property. We will get here only
                   1062:       if the support is in the binary; otherwise a compile-time error occurs.
                   1063:       */
                   1064: 
                   1065: #ifdef SUPPORT_UCP
                   1066:       case OP_PROP:
                   1067:       case OP_NOTPROP:
                   1068:       if (clen > 0)
                   1069:         {
                   1070:         BOOL OK;
1.1.1.4   misho    1071:         const pcre_uint32 *cp;
1.1       misho    1072:         const ucd_record * prop = GET_UCD(c);
                   1073:         switch(code[1])
                   1074:           {
                   1075:           case PT_ANY:
                   1076:           OK = TRUE;
                   1077:           break;
                   1078: 
                   1079:           case PT_LAMP:
                   1080:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1081:                prop->chartype == ucp_Lt;
                   1082:           break;
                   1083: 
                   1084:           case PT_GC:
1.1.1.2   misho    1085:           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1.1       misho    1086:           break;
                   1087: 
                   1088:           case PT_PC:
                   1089:           OK = prop->chartype == code[2];
                   1090:           break;
                   1091: 
                   1092:           case PT_SC:
                   1093:           OK = prop->script == code[2];
                   1094:           break;
                   1095: 
                   1096:           /* These are specials for combination cases. */
                   1097: 
                   1098:           case PT_ALNUM:
1.1.1.2   misho    1099:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1100:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1101:           break;
                   1102: 
1.1.1.5 ! misho    1103:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
        !          1104:           which means that Perl space and POSIX space are now identical. PCRE
        !          1105:           was changed at release 8.34. */
1.1       misho    1106: 
1.1.1.5 ! misho    1107:           case PT_SPACE:    /* Perl space */
1.1       misho    1108:           case PT_PXSPACE:  /* POSIX space */
1.1.1.5 ! misho    1109:           switch(c)
        !          1110:             {
        !          1111:             HSPACE_CASES:
        !          1112:             VSPACE_CASES:
        !          1113:             OK = TRUE;
        !          1114:             break;
        !          1115: 
        !          1116:             default:
        !          1117:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
        !          1118:             break;
        !          1119:             }
1.1       misho    1120:           break;
                   1121: 
                   1122:           case PT_WORD:
1.1.1.2   misho    1123:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1124:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1125:                c == CHAR_UNDERSCORE;
                   1126:           break;
                   1127: 
1.1.1.4   misho    1128:           case PT_CLIST:
                   1129:           cp = PRIV(ucd_caseless_sets) + code[2];
                   1130:           for (;;)
                   1131:             {
                   1132:             if (c < *cp) { OK = FALSE; break; }
                   1133:             if (c == *cp++) { OK = TRUE; break; }
                   1134:             }
                   1135:           break;
                   1136: 
                   1137:           case PT_UCNC:
                   1138:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   1139:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   1140:                c >= 0xe000;
                   1141:           break;
                   1142: 
1.1       misho    1143:           /* Should never occur, but keep compilers from grumbling. */
                   1144: 
                   1145:           default:
                   1146:           OK = codevalue != OP_PROP;
                   1147:           break;
                   1148:           }
                   1149: 
                   1150:         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
                   1151:         }
                   1152:       break;
                   1153: #endif
                   1154: 
                   1155: 
                   1156: 
                   1157: /* ========================================================================== */
                   1158:       /* These opcodes likewise inspect the subject character, but have an
                   1159:       argument that is not a data character. It is one of these opcodes:
                   1160:       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
                   1161:       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
                   1162: 
                   1163:       case OP_TYPEPLUS:
                   1164:       case OP_TYPEMINPLUS:
                   1165:       case OP_TYPEPOSPLUS:
                   1166:       count = current_state->count;  /* Already matched */
                   1167:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1168:       if (clen > 0)
                   1169:         {
1.1.1.3   misho    1170:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1171:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1172:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1173:             NLBLOCK->nllen == 2 &&
                   1174:             c == NLBLOCK->nl[0])
                   1175:           {
                   1176:           could_continue = partial_newline = TRUE;
                   1177:           }
                   1178:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1179:             (c < 256 &&
                   1180:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1181:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1182:           {
                   1183:           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
                   1184:             {
                   1185:             active_count--;            /* Remove non-match possibility */
                   1186:             next_active_state--;
                   1187:             }
                   1188:           count++;
                   1189:           ADD_NEW(state_offset, count);
                   1190:           }
                   1191:         }
                   1192:       break;
                   1193: 
                   1194:       /*-----------------------------------------------------------------*/
                   1195:       case OP_TYPEQUERY:
                   1196:       case OP_TYPEMINQUERY:
                   1197:       case OP_TYPEPOSQUERY:
                   1198:       ADD_ACTIVE(state_offset + 2, 0);
                   1199:       if (clen > 0)
                   1200:         {
1.1.1.3   misho    1201:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1202:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1203:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1204:             NLBLOCK->nllen == 2 &&
                   1205:             c == NLBLOCK->nl[0])
                   1206:           {
                   1207:           could_continue = partial_newline = TRUE;
                   1208:           }
                   1209:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1210:             (c < 256 &&
                   1211:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1212:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1213:           {
                   1214:           if (codevalue == OP_TYPEPOSQUERY)
                   1215:             {
                   1216:             active_count--;            /* Remove non-match possibility */
                   1217:             next_active_state--;
                   1218:             }
                   1219:           ADD_NEW(state_offset + 2, 0);
                   1220:           }
                   1221:         }
                   1222:       break;
                   1223: 
                   1224:       /*-----------------------------------------------------------------*/
                   1225:       case OP_TYPESTAR:
                   1226:       case OP_TYPEMINSTAR:
                   1227:       case OP_TYPEPOSSTAR:
                   1228:       ADD_ACTIVE(state_offset + 2, 0);
                   1229:       if (clen > 0)
                   1230:         {
1.1.1.3   misho    1231:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1232:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1233:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1234:             NLBLOCK->nllen == 2 &&
                   1235:             c == NLBLOCK->nl[0])
                   1236:           {
                   1237:           could_continue = partial_newline = TRUE;
                   1238:           }
                   1239:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1240:             (c < 256 &&
                   1241:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1242:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1243:           {
                   1244:           if (codevalue == OP_TYPEPOSSTAR)
                   1245:             {
                   1246:             active_count--;            /* Remove non-match possibility */
                   1247:             next_active_state--;
                   1248:             }
                   1249:           ADD_NEW(state_offset, 0);
                   1250:           }
                   1251:         }
                   1252:       break;
                   1253: 
                   1254:       /*-----------------------------------------------------------------*/
                   1255:       case OP_TYPEEXACT:
                   1256:       count = current_state->count;  /* Number already matched */
                   1257:       if (clen > 0)
                   1258:         {
1.1.1.3   misho    1259:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1260:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1261:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1262:             NLBLOCK->nllen == 2 &&
                   1263:             c == NLBLOCK->nl[0])
                   1264:           {
                   1265:           could_continue = partial_newline = TRUE;
                   1266:           }
                   1267:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1268:             (c < 256 &&
                   1269:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1270:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1271:           {
1.1.1.4   misho    1272:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1273:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1.1       misho    1274:           else
                   1275:             { ADD_NEW(state_offset, count); }
                   1276:           }
                   1277:         }
                   1278:       break;
                   1279: 
                   1280:       /*-----------------------------------------------------------------*/
                   1281:       case OP_TYPEUPTO:
                   1282:       case OP_TYPEMINUPTO:
                   1283:       case OP_TYPEPOSUPTO:
1.1.1.2   misho    1284:       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1.1       misho    1285:       count = current_state->count;  /* Number already matched */
                   1286:       if (clen > 0)
                   1287:         {
1.1.1.3   misho    1288:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1289:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1290:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1291:             NLBLOCK->nllen == 2 &&
                   1292:             c == NLBLOCK->nl[0])
                   1293:           {
                   1294:           could_continue = partial_newline = TRUE;
                   1295:           }
                   1296:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1297:             (c < 256 &&
                   1298:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1299:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1300:           {
                   1301:           if (codevalue == OP_TYPEPOSUPTO)
                   1302:             {
                   1303:             active_count--;           /* Remove non-match possibility */
                   1304:             next_active_state--;
                   1305:             }
1.1.1.4   misho    1306:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1307:             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1308:           else
                   1309:             { ADD_NEW(state_offset, count); }
                   1310:           }
                   1311:         }
                   1312:       break;
                   1313: 
                   1314: /* ========================================================================== */
                   1315:       /* These are virtual opcodes that are used when something like
                   1316:       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
                   1317:       argument. It keeps the code above fast for the other cases. The argument
                   1318:       is in the d variable. */
                   1319: 
                   1320: #ifdef SUPPORT_UCP
                   1321:       case OP_PROP_EXTRA + OP_TYPEPLUS:
                   1322:       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
                   1323:       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
                   1324:       count = current_state->count;           /* Already matched */
                   1325:       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
                   1326:       if (clen > 0)
                   1327:         {
                   1328:         BOOL OK;
1.1.1.4   misho    1329:         const pcre_uint32 *cp;
1.1       misho    1330:         const ucd_record * prop = GET_UCD(c);
                   1331:         switch(code[2])
                   1332:           {
                   1333:           case PT_ANY:
                   1334:           OK = TRUE;
                   1335:           break;
                   1336: 
                   1337:           case PT_LAMP:
                   1338:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1339:             prop->chartype == ucp_Lt;
                   1340:           break;
                   1341: 
                   1342:           case PT_GC:
1.1.1.2   misho    1343:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1.1       misho    1344:           break;
                   1345: 
                   1346:           case PT_PC:
                   1347:           OK = prop->chartype == code[3];
                   1348:           break;
                   1349: 
                   1350:           case PT_SC:
                   1351:           OK = prop->script == code[3];
                   1352:           break;
                   1353: 
                   1354:           /* These are specials for combination cases. */
                   1355: 
                   1356:           case PT_ALNUM:
1.1.1.2   misho    1357:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1358:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1359:           break;
                   1360: 
1.1.1.5 ! misho    1361:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
        !          1362:           which means that Perl space and POSIX space are now identical. PCRE
        !          1363:           was changed at release 8.34. */
1.1       misho    1364: 
1.1.1.5 ! misho    1365:           case PT_SPACE:    /* Perl space */
1.1       misho    1366:           case PT_PXSPACE:  /* POSIX space */
1.1.1.5 ! misho    1367:           switch(c)
        !          1368:             {
        !          1369:             HSPACE_CASES:
        !          1370:             VSPACE_CASES:
        !          1371:             OK = TRUE;
        !          1372:             break;
        !          1373: 
        !          1374:             default:
        !          1375:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
        !          1376:             break;
        !          1377:             }
1.1       misho    1378:           break;
                   1379: 
                   1380:           case PT_WORD:
1.1.1.2   misho    1381:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1382:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1383:                c == CHAR_UNDERSCORE;
                   1384:           break;
                   1385: 
1.1.1.4   misho    1386:           case PT_CLIST:
                   1387:           cp = PRIV(ucd_caseless_sets) + code[3];
                   1388:           for (;;)
                   1389:             {
                   1390:             if (c < *cp) { OK = FALSE; break; }
                   1391:             if (c == *cp++) { OK = TRUE; break; }
                   1392:             }
                   1393:           break;
                   1394: 
                   1395:           case PT_UCNC:
                   1396:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   1397:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   1398:                c >= 0xe000;
                   1399:           break;
                   1400: 
1.1       misho    1401:           /* Should never occur, but keep compilers from grumbling. */
                   1402: 
                   1403:           default:
                   1404:           OK = codevalue != OP_PROP;
                   1405:           break;
                   1406:           }
                   1407: 
                   1408:         if (OK == (d == OP_PROP))
                   1409:           {
                   1410:           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
                   1411:             {
                   1412:             active_count--;           /* Remove non-match possibility */
                   1413:             next_active_state--;
                   1414:             }
                   1415:           count++;
                   1416:           ADD_NEW(state_offset, count);
                   1417:           }
                   1418:         }
                   1419:       break;
                   1420: 
                   1421:       /*-----------------------------------------------------------------*/
                   1422:       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
                   1423:       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
                   1424:       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
                   1425:       count = current_state->count;  /* Already matched */
                   1426:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1.1.1.4   misho    1427:       if (clen > 0)
1.1       misho    1428:         {
1.1.1.4   misho    1429:         int lgb, rgb;
1.1.1.2   misho    1430:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1431:         int ncount = 0;
                   1432:         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
                   1433:           {
                   1434:           active_count--;           /* Remove non-match possibility */
                   1435:           next_active_state--;
                   1436:           }
1.1.1.4   misho    1437:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    1438:         while (nptr < end_subject)
                   1439:           {
1.1.1.4   misho    1440:           dlen = 1;
                   1441:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
                   1442:           rgb = UCD_GRAPHBREAK(d);
                   1443:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    1444:           ncount++;
1.1.1.4   misho    1445:           lgb = rgb;
                   1446:           nptr += dlen;
1.1       misho    1447:           }
                   1448:         count++;
                   1449:         ADD_NEW_DATA(-state_offset, count, ncount);
                   1450:         }
                   1451:       break;
                   1452: #endif
                   1453: 
                   1454:       /*-----------------------------------------------------------------*/
                   1455:       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
                   1456:       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
                   1457:       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
                   1458:       count = current_state->count;  /* Already matched */
                   1459:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1460:       if (clen > 0)
                   1461:         {
                   1462:         int ncount = 0;
                   1463:         switch (c)
                   1464:           {
1.1.1.4   misho    1465:           case CHAR_VT:
                   1466:           case CHAR_FF:
                   1467:           case CHAR_NEL:
                   1468: #ifndef EBCDIC
1.1       misho    1469:           case 0x2028:
                   1470:           case 0x2029:
1.1.1.4   misho    1471: #endif  /* Not EBCDIC */
1.1       misho    1472:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1473:           goto ANYNL01;
                   1474: 
1.1.1.4   misho    1475:           case CHAR_CR:
                   1476:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1.1       misho    1477:           /* Fall through */
                   1478: 
                   1479:           ANYNL01:
1.1.1.4   misho    1480:           case CHAR_LF:
1.1       misho    1481:           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
                   1482:             {
                   1483:             active_count--;           /* Remove non-match possibility */
                   1484:             next_active_state--;
                   1485:             }
                   1486:           count++;
                   1487:           ADD_NEW_DATA(-state_offset, count, ncount);
                   1488:           break;
                   1489: 
                   1490:           default:
                   1491:           break;
                   1492:           }
                   1493:         }
                   1494:       break;
                   1495: 
                   1496:       /*-----------------------------------------------------------------*/
                   1497:       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
                   1498:       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
                   1499:       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1500:       count = current_state->count;  /* Already matched */
                   1501:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1502:       if (clen > 0)
                   1503:         {
                   1504:         BOOL OK;
                   1505:         switch (c)
                   1506:           {
1.1.1.4   misho    1507:           VSPACE_CASES:
1.1       misho    1508:           OK = TRUE;
                   1509:           break;
                   1510: 
                   1511:           default:
                   1512:           OK = FALSE;
                   1513:           break;
                   1514:           }
                   1515: 
                   1516:         if (OK == (d == OP_VSPACE))
                   1517:           {
                   1518:           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1519:             {
                   1520:             active_count--;           /* Remove non-match possibility */
                   1521:             next_active_state--;
                   1522:             }
                   1523:           count++;
                   1524:           ADD_NEW_DATA(-state_offset, count, 0);
                   1525:           }
                   1526:         }
                   1527:       break;
                   1528: 
                   1529:       /*-----------------------------------------------------------------*/
                   1530:       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
                   1531:       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
                   1532:       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1533:       count = current_state->count;  /* Already matched */
                   1534:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1535:       if (clen > 0)
                   1536:         {
                   1537:         BOOL OK;
                   1538:         switch (c)
                   1539:           {
1.1.1.4   misho    1540:           HSPACE_CASES:
1.1       misho    1541:           OK = TRUE;
                   1542:           break;
                   1543: 
                   1544:           default:
                   1545:           OK = FALSE;
                   1546:           break;
                   1547:           }
                   1548: 
                   1549:         if (OK == (d == OP_HSPACE))
                   1550:           {
                   1551:           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1552:             {
                   1553:             active_count--;           /* Remove non-match possibility */
                   1554:             next_active_state--;
                   1555:             }
                   1556:           count++;
                   1557:           ADD_NEW_DATA(-state_offset, count, 0);
                   1558:           }
                   1559:         }
                   1560:       break;
                   1561: 
                   1562:       /*-----------------------------------------------------------------*/
                   1563: #ifdef SUPPORT_UCP
                   1564:       case OP_PROP_EXTRA + OP_TYPEQUERY:
                   1565:       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
                   1566:       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
                   1567:       count = 4;
                   1568:       goto QS1;
                   1569: 
                   1570:       case OP_PROP_EXTRA + OP_TYPESTAR:
                   1571:       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
                   1572:       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
                   1573:       count = 0;
                   1574: 
                   1575:       QS1:
                   1576: 
                   1577:       ADD_ACTIVE(state_offset + 4, 0);
                   1578:       if (clen > 0)
                   1579:         {
                   1580:         BOOL OK;
1.1.1.4   misho    1581:         const pcre_uint32 *cp;
1.1       misho    1582:         const ucd_record * prop = GET_UCD(c);
                   1583:         switch(code[2])
                   1584:           {
                   1585:           case PT_ANY:
                   1586:           OK = TRUE;
                   1587:           break;
                   1588: 
                   1589:           case PT_LAMP:
                   1590:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1591:             prop->chartype == ucp_Lt;
                   1592:           break;
                   1593: 
                   1594:           case PT_GC:
1.1.1.2   misho    1595:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1.1       misho    1596:           break;
                   1597: 
                   1598:           case PT_PC:
                   1599:           OK = prop->chartype == code[3];
                   1600:           break;
                   1601: 
                   1602:           case PT_SC:
                   1603:           OK = prop->script == code[3];
                   1604:           break;
                   1605: 
                   1606:           /* These are specials for combination cases. */
                   1607: 
                   1608:           case PT_ALNUM:
1.1.1.2   misho    1609:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1610:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1611:           break;
                   1612: 
1.1.1.5 ! misho    1613:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
        !          1614:           which means that Perl space and POSIX space are now identical. PCRE
        !          1615:           was changed at release 8.34. */
1.1       misho    1616: 
1.1.1.5 ! misho    1617:           case PT_SPACE:    /* Perl space */
1.1       misho    1618:           case PT_PXSPACE:  /* POSIX space */
1.1.1.5 ! misho    1619:           switch(c)
        !          1620:             {
        !          1621:             HSPACE_CASES:
        !          1622:             VSPACE_CASES:
        !          1623:             OK = TRUE;
        !          1624:             break;
        !          1625: 
        !          1626:             default:
        !          1627:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
        !          1628:             break;
        !          1629:             }
1.1       misho    1630:           break;
                   1631: 
                   1632:           case PT_WORD:
1.1.1.2   misho    1633:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1634:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1635:                c == CHAR_UNDERSCORE;
                   1636:           break;
                   1637: 
1.1.1.4   misho    1638:           case PT_CLIST:
                   1639:           cp = PRIV(ucd_caseless_sets) + code[3];
                   1640:           for (;;)
                   1641:             {
                   1642:             if (c < *cp) { OK = FALSE; break; }
                   1643:             if (c == *cp++) { OK = TRUE; break; }
                   1644:             }
                   1645:           break;
                   1646: 
                   1647:           case PT_UCNC:
                   1648:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   1649:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   1650:                c >= 0xe000;
                   1651:           break;
                   1652: 
1.1       misho    1653:           /* Should never occur, but keep compilers from grumbling. */
                   1654: 
                   1655:           default:
                   1656:           OK = codevalue != OP_PROP;
                   1657:           break;
                   1658:           }
                   1659: 
                   1660:         if (OK == (d == OP_PROP))
                   1661:           {
                   1662:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
                   1663:               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
                   1664:             {
                   1665:             active_count--;           /* Remove non-match possibility */
                   1666:             next_active_state--;
                   1667:             }
                   1668:           ADD_NEW(state_offset + count, 0);
                   1669:           }
                   1670:         }
                   1671:       break;
                   1672: 
                   1673:       /*-----------------------------------------------------------------*/
                   1674:       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
                   1675:       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
                   1676:       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
                   1677:       count = 2;
                   1678:       goto QS2;
                   1679: 
                   1680:       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
                   1681:       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
                   1682:       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
                   1683:       count = 0;
                   1684: 
                   1685:       QS2:
                   1686: 
                   1687:       ADD_ACTIVE(state_offset + 2, 0);
1.1.1.4   misho    1688:       if (clen > 0)
1.1       misho    1689:         {
1.1.1.4   misho    1690:         int lgb, rgb;
1.1.1.2   misho    1691:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1692:         int ncount = 0;
                   1693:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
                   1694:             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
                   1695:           {
                   1696:           active_count--;           /* Remove non-match possibility */
                   1697:           next_active_state--;
                   1698:           }
1.1.1.4   misho    1699:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    1700:         while (nptr < end_subject)
                   1701:           {
1.1.1.4   misho    1702:           dlen = 1;
                   1703:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
                   1704:           rgb = UCD_GRAPHBREAK(d);
                   1705:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    1706:           ncount++;
1.1.1.4   misho    1707:           lgb = rgb;
                   1708:           nptr += dlen;
1.1       misho    1709:           }
                   1710:         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
                   1711:         }
                   1712:       break;
                   1713: #endif
                   1714: 
                   1715:       /*-----------------------------------------------------------------*/
                   1716:       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
                   1717:       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
                   1718:       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
                   1719:       count = 2;
                   1720:       goto QS3;
                   1721: 
                   1722:       case OP_ANYNL_EXTRA + OP_TYPESTAR:
                   1723:       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
                   1724:       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
                   1725:       count = 0;
                   1726: 
                   1727:       QS3:
                   1728:       ADD_ACTIVE(state_offset + 2, 0);
                   1729:       if (clen > 0)
                   1730:         {
                   1731:         int ncount = 0;
                   1732:         switch (c)
                   1733:           {
1.1.1.4   misho    1734:           case CHAR_VT:
                   1735:           case CHAR_FF:
                   1736:           case CHAR_NEL:
                   1737: #ifndef EBCDIC
1.1       misho    1738:           case 0x2028:
                   1739:           case 0x2029:
1.1.1.4   misho    1740: #endif  /* Not EBCDIC */
1.1       misho    1741:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1742:           goto ANYNL02;
                   1743: 
1.1.1.4   misho    1744:           case CHAR_CR:
                   1745:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1.1       misho    1746:           /* Fall through */
                   1747: 
                   1748:           ANYNL02:
1.1.1.4   misho    1749:           case CHAR_LF:
1.1       misho    1750:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
                   1751:               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
                   1752:             {
                   1753:             active_count--;           /* Remove non-match possibility */
                   1754:             next_active_state--;
                   1755:             }
1.1.1.4   misho    1756:           ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1.1       misho    1757:           break;
                   1758: 
                   1759:           default:
                   1760:           break;
                   1761:           }
                   1762:         }
                   1763:       break;
                   1764: 
                   1765:       /*-----------------------------------------------------------------*/
                   1766:       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
                   1767:       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
                   1768:       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1769:       count = 2;
                   1770:       goto QS4;
                   1771: 
                   1772:       case OP_VSPACE_EXTRA + OP_TYPESTAR:
                   1773:       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
                   1774:       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1775:       count = 0;
                   1776: 
                   1777:       QS4:
                   1778:       ADD_ACTIVE(state_offset + 2, 0);
                   1779:       if (clen > 0)
                   1780:         {
                   1781:         BOOL OK;
                   1782:         switch (c)
                   1783:           {
1.1.1.4   misho    1784:           VSPACE_CASES:
1.1       misho    1785:           OK = TRUE;
                   1786:           break;
                   1787: 
                   1788:           default:
                   1789:           OK = FALSE;
                   1790:           break;
                   1791:           }
                   1792:         if (OK == (d == OP_VSPACE))
                   1793:           {
                   1794:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1795:               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1796:             {
                   1797:             active_count--;           /* Remove non-match possibility */
                   1798:             next_active_state--;
                   1799:             }
1.1.1.4   misho    1800:           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1.1       misho    1801:           }
                   1802:         }
                   1803:       break;
                   1804: 
                   1805:       /*-----------------------------------------------------------------*/
                   1806:       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
                   1807:       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
                   1808:       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1809:       count = 2;
                   1810:       goto QS5;
                   1811: 
                   1812:       case OP_HSPACE_EXTRA + OP_TYPESTAR:
                   1813:       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
                   1814:       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1815:       count = 0;
                   1816: 
                   1817:       QS5:
                   1818:       ADD_ACTIVE(state_offset + 2, 0);
                   1819:       if (clen > 0)
                   1820:         {
                   1821:         BOOL OK;
                   1822:         switch (c)
                   1823:           {
1.1.1.4   misho    1824:           HSPACE_CASES:
1.1       misho    1825:           OK = TRUE;
                   1826:           break;
                   1827: 
                   1828:           default:
                   1829:           OK = FALSE;
                   1830:           break;
                   1831:           }
                   1832: 
                   1833:         if (OK == (d == OP_HSPACE))
                   1834:           {
                   1835:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1836:               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1837:             {
                   1838:             active_count--;           /* Remove non-match possibility */
                   1839:             next_active_state--;
                   1840:             }
1.1.1.4   misho    1841:           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1.1       misho    1842:           }
                   1843:         }
                   1844:       break;
                   1845: 
                   1846:       /*-----------------------------------------------------------------*/
                   1847: #ifdef SUPPORT_UCP
                   1848:       case OP_PROP_EXTRA + OP_TYPEEXACT:
                   1849:       case OP_PROP_EXTRA + OP_TYPEUPTO:
                   1850:       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
                   1851:       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
                   1852:       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1853:         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1.1       misho    1854:       count = current_state->count;  /* Number already matched */
                   1855:       if (clen > 0)
                   1856:         {
                   1857:         BOOL OK;
1.1.1.4   misho    1858:         const pcre_uint32 *cp;
1.1       misho    1859:         const ucd_record * prop = GET_UCD(c);
1.1.1.2   misho    1860:         switch(code[1 + IMM2_SIZE + 1])
1.1       misho    1861:           {
                   1862:           case PT_ANY:
                   1863:           OK = TRUE;
                   1864:           break;
                   1865: 
                   1866:           case PT_LAMP:
                   1867:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1868:             prop->chartype == ucp_Lt;
                   1869:           break;
                   1870: 
                   1871:           case PT_GC:
1.1.1.2   misho    1872:           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1.1       misho    1873:           break;
                   1874: 
                   1875:           case PT_PC:
1.1.1.2   misho    1876:           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1.1       misho    1877:           break;
                   1878: 
                   1879:           case PT_SC:
1.1.1.2   misho    1880:           OK = prop->script == code[1 + IMM2_SIZE + 2];
1.1       misho    1881:           break;
                   1882: 
                   1883:           /* These are specials for combination cases. */
                   1884: 
                   1885:           case PT_ALNUM:
1.1.1.2   misho    1886:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1887:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1888:           break;
                   1889: 
1.1.1.5 ! misho    1890:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
        !          1891:           which means that Perl space and POSIX space are now identical. PCRE
        !          1892:           was changed at release 8.34. */
1.1       misho    1893: 
1.1.1.5 ! misho    1894:           case PT_SPACE:    /* Perl space */
1.1       misho    1895:           case PT_PXSPACE:  /* POSIX space */
1.1.1.5 ! misho    1896:           switch(c)
        !          1897:             {
        !          1898:             HSPACE_CASES:
        !          1899:             VSPACE_CASES:
        !          1900:             OK = TRUE;
        !          1901:             break;
        !          1902: 
        !          1903:             default:
        !          1904:             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
        !          1905:             break;
        !          1906:             }
1.1       misho    1907:           break;
                   1908: 
                   1909:           case PT_WORD:
1.1.1.2   misho    1910:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1911:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1912:                c == CHAR_UNDERSCORE;
                   1913:           break;
                   1914: 
1.1.1.4   misho    1915:           case PT_CLIST:
                   1916:           cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
                   1917:           for (;;)
                   1918:             {
                   1919:             if (c < *cp) { OK = FALSE; break; }
                   1920:             if (c == *cp++) { OK = TRUE; break; }
                   1921:             }
                   1922:           break;
                   1923: 
                   1924:           case PT_UCNC:
                   1925:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   1926:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   1927:                c >= 0xe000;
                   1928:           break;
                   1929: 
1.1       misho    1930:           /* Should never occur, but keep compilers from grumbling. */
                   1931: 
                   1932:           default:
                   1933:           OK = codevalue != OP_PROP;
                   1934:           break;
                   1935:           }
                   1936: 
                   1937:         if (OK == (d == OP_PROP))
                   1938:           {
                   1939:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
                   1940:             {
                   1941:             active_count--;           /* Remove non-match possibility */
                   1942:             next_active_state--;
                   1943:             }
1.1.1.4   misho    1944:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1945:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1.1       misho    1946:           else
                   1947:             { ADD_NEW(state_offset, count); }
                   1948:           }
                   1949:         }
                   1950:       break;
                   1951: 
                   1952:       /*-----------------------------------------------------------------*/
                   1953:       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
                   1954:       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
                   1955:       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
                   1956:       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
                   1957:       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1958:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1959:       count = current_state->count;  /* Number already matched */
1.1.1.4   misho    1960:       if (clen > 0)
1.1       misho    1961:         {
1.1.1.4   misho    1962:         int lgb, rgb;
1.1.1.2   misho    1963:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1964:         int ncount = 0;
                   1965:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
                   1966:           {
                   1967:           active_count--;           /* Remove non-match possibility */
                   1968:           next_active_state--;
                   1969:           }
1.1.1.4   misho    1970:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    1971:         while (nptr < end_subject)
                   1972:           {
1.1.1.4   misho    1973:           dlen = 1;
                   1974:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
                   1975:           rgb = UCD_GRAPHBREAK(d);
                   1976:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    1977:           ncount++;
1.1.1.4   misho    1978:           lgb = rgb;
                   1979:           nptr += dlen;
1.1       misho    1980:           }
1.1.1.3   misho    1981:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                   1982:             reset_could_continue = TRUE;
1.1.1.4   misho    1983:         if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1984:           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1.1       misho    1985:         else
                   1986:           { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1987:         }
                   1988:       break;
                   1989: #endif
                   1990: 
                   1991:       /*-----------------------------------------------------------------*/
                   1992:       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
                   1993:       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
                   1994:       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
                   1995:       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
                   1996:       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1997:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1998:       count = current_state->count;  /* Number already matched */
                   1999:       if (clen > 0)
                   2000:         {
                   2001:         int ncount = 0;
                   2002:         switch (c)
                   2003:           {
1.1.1.4   misho    2004:           case CHAR_VT:
                   2005:           case CHAR_FF:
                   2006:           case CHAR_NEL:
                   2007: #ifndef EBCDIC
1.1       misho    2008:           case 0x2028:
                   2009:           case 0x2029:
1.1.1.4   misho    2010: #endif  /* Not EBCDIC */
1.1       misho    2011:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   2012:           goto ANYNL03;
                   2013: 
1.1.1.4   misho    2014:           case CHAR_CR:
                   2015:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1.1       misho    2016:           /* Fall through */
                   2017: 
                   2018:           ANYNL03:
1.1.1.4   misho    2019:           case CHAR_LF:
1.1       misho    2020:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
                   2021:             {
                   2022:             active_count--;           /* Remove non-match possibility */
                   2023:             next_active_state--;
                   2024:             }
1.1.1.4   misho    2025:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2026:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1.1       misho    2027:           else
                   2028:             { ADD_NEW_DATA(-state_offset, count, ncount); }
                   2029:           break;
                   2030: 
                   2031:           default:
                   2032:           break;
                   2033:           }
                   2034:         }
                   2035:       break;
                   2036: 
                   2037:       /*-----------------------------------------------------------------*/
                   2038:       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
                   2039:       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
                   2040:       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
                   2041:       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
                   2042:       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    2043:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    2044:       count = current_state->count;  /* Number already matched */
                   2045:       if (clen > 0)
                   2046:         {
                   2047:         BOOL OK;
                   2048:         switch (c)
                   2049:           {
1.1.1.4   misho    2050:           VSPACE_CASES:
1.1       misho    2051:           OK = TRUE;
                   2052:           break;
                   2053: 
                   2054:           default:
                   2055:           OK = FALSE;
                   2056:           }
                   2057: 
                   2058:         if (OK == (d == OP_VSPACE))
                   2059:           {
                   2060:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
                   2061:             {
                   2062:             active_count--;           /* Remove non-match possibility */
                   2063:             next_active_state--;
                   2064:             }
1.1.1.4   misho    2065:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2066:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1.1       misho    2067:           else
                   2068:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   2069:           }
                   2070:         }
                   2071:       break;
                   2072: 
                   2073:       /*-----------------------------------------------------------------*/
                   2074:       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
                   2075:       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
                   2076:       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
                   2077:       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
                   2078:       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    2079:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    2080:       count = current_state->count;  /* Number already matched */
                   2081:       if (clen > 0)
                   2082:         {
                   2083:         BOOL OK;
                   2084:         switch (c)
                   2085:           {
1.1.1.4   misho    2086:           HSPACE_CASES:
1.1       misho    2087:           OK = TRUE;
                   2088:           break;
                   2089: 
                   2090:           default:
                   2091:           OK = FALSE;
                   2092:           break;
                   2093:           }
                   2094: 
                   2095:         if (OK == (d == OP_HSPACE))
                   2096:           {
                   2097:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
                   2098:             {
                   2099:             active_count--;           /* Remove non-match possibility */
                   2100:             next_active_state--;
                   2101:             }
1.1.1.4   misho    2102:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2103:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1.1       misho    2104:           else
                   2105:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   2106:           }
                   2107:         }
                   2108:       break;
                   2109: 
                   2110: /* ========================================================================== */
                   2111:       /* These opcodes are followed by a character that is usually compared
                   2112:       to the current subject character; it is loaded into d. We still get
                   2113:       here even if there is no subject character, because in some cases zero
                   2114:       repetitions are permitted. */
                   2115: 
                   2116:       /*-----------------------------------------------------------------*/
                   2117:       case OP_CHAR:
                   2118:       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2119:       break;
                   2120: 
                   2121:       /*-----------------------------------------------------------------*/
                   2122:       case OP_CHARI:
                   2123:       if (clen == 0) break;
                   2124: 
1.1.1.2   misho    2125: #ifdef SUPPORT_UTF
                   2126:       if (utf)
1.1       misho    2127:         {
                   2128:         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
                   2129:           {
                   2130:           unsigned int othercase;
1.1.1.2   misho    2131:           if (c < 128)
                   2132:             othercase = fcc[c];
                   2133:           else
                   2134:             /* If we have Unicode property support, we can use it to test the
                   2135:             other case of the character. */
1.1       misho    2136: #ifdef SUPPORT_UCP
1.1.1.2   misho    2137:             othercase = UCD_OTHERCASE(c);
1.1       misho    2138: #else
1.1.1.2   misho    2139:             othercase = NOTACHAR;
1.1       misho    2140: #endif
                   2141: 
                   2142:           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2143:           }
                   2144:         }
                   2145:       else
1.1.1.2   misho    2146: #endif  /* SUPPORT_UTF */
                   2147:       /* Not UTF mode */
1.1       misho    2148:         {
1.1.1.2   misho    2149:         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
                   2150:           { ADD_NEW(state_offset + 2, 0); }
1.1       misho    2151:         }
                   2152:       break;
                   2153: 
                   2154: 
                   2155: #ifdef SUPPORT_UCP
                   2156:       /*-----------------------------------------------------------------*/
                   2157:       /* This is a tricky one because it can match more than one character.
                   2158:       Find out how many characters to skip, and then set up a negative state
                   2159:       to wait for them to pass before continuing. */
                   2160: 
                   2161:       case OP_EXTUNI:
1.1.1.4   misho    2162:       if (clen > 0)
1.1       misho    2163:         {
1.1.1.4   misho    2164:         int lgb, rgb;
1.1.1.2   misho    2165:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    2166:         int ncount = 0;
1.1.1.4   misho    2167:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    2168:         while (nptr < end_subject)
                   2169:           {
1.1.1.4   misho    2170:           dlen = 1;
                   2171:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
                   2172:           rgb = UCD_GRAPHBREAK(d);
                   2173:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    2174:           ncount++;
1.1.1.4   misho    2175:           lgb = rgb;
                   2176:           nptr += dlen;
1.1       misho    2177:           }
1.1.1.3   misho    2178:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                   2179:             reset_could_continue = TRUE;
1.1       misho    2180:         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
                   2181:         }
                   2182:       break;
                   2183: #endif
                   2184: 
                   2185:       /*-----------------------------------------------------------------*/
                   2186:       /* This is a tricky like EXTUNI because it too can match more than one
                   2187:       character (when CR is followed by LF). In this case, set up a negative
                   2188:       state to wait for one character to pass before continuing. */
                   2189: 
                   2190:       case OP_ANYNL:
                   2191:       if (clen > 0) switch(c)
                   2192:         {
1.1.1.4   misho    2193:         case CHAR_VT:
                   2194:         case CHAR_FF:
                   2195:         case CHAR_NEL:
                   2196: #ifndef EBCDIC
1.1       misho    2197:         case 0x2028:
                   2198:         case 0x2029:
1.1.1.4   misho    2199: #endif  /* Not EBCDIC */
1.1       misho    2200:         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   2201: 
1.1.1.4   misho    2202:         case CHAR_LF:
1.1       misho    2203:         ADD_NEW(state_offset + 1, 0);
                   2204:         break;
                   2205: 
1.1.1.4   misho    2206:         case CHAR_CR:
1.1.1.3   misho    2207:         if (ptr + 1 >= end_subject)
                   2208:           {
                   2209:           ADD_NEW(state_offset + 1, 0);
                   2210:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                   2211:             reset_could_continue = TRUE;
                   2212:           }
1.1.1.4   misho    2213:         else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
1.1       misho    2214:           {
                   2215:           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                   2216:           }
                   2217:         else
                   2218:           {
                   2219:           ADD_NEW(state_offset + 1, 0);
                   2220:           }
                   2221:         break;
                   2222:         }
                   2223:       break;
                   2224: 
                   2225:       /*-----------------------------------------------------------------*/
                   2226:       case OP_NOT_VSPACE:
                   2227:       if (clen > 0) switch(c)
                   2228:         {
1.1.1.4   misho    2229:         VSPACE_CASES:
1.1       misho    2230:         break;
                   2231: 
                   2232:         default:
                   2233:         ADD_NEW(state_offset + 1, 0);
                   2234:         break;
                   2235:         }
                   2236:       break;
                   2237: 
                   2238:       /*-----------------------------------------------------------------*/
                   2239:       case OP_VSPACE:
                   2240:       if (clen > 0) switch(c)
                   2241:         {
1.1.1.4   misho    2242:         VSPACE_CASES:
1.1       misho    2243:         ADD_NEW(state_offset + 1, 0);
                   2244:         break;
                   2245: 
1.1.1.4   misho    2246:         default:
                   2247:         break;
1.1       misho    2248:         }
                   2249:       break;
                   2250: 
                   2251:       /*-----------------------------------------------------------------*/
                   2252:       case OP_NOT_HSPACE:
                   2253:       if (clen > 0) switch(c)
                   2254:         {
1.1.1.4   misho    2255:         HSPACE_CASES:
1.1       misho    2256:         break;
                   2257: 
                   2258:         default:
                   2259:         ADD_NEW(state_offset + 1, 0);
                   2260:         break;
                   2261:         }
                   2262:       break;
                   2263: 
                   2264:       /*-----------------------------------------------------------------*/
                   2265:       case OP_HSPACE:
                   2266:       if (clen > 0) switch(c)
                   2267:         {
1.1.1.4   misho    2268:         HSPACE_CASES:
1.1       misho    2269:         ADD_NEW(state_offset + 1, 0);
                   2270:         break;
1.1.1.4   misho    2271: 
                   2272:         default:
                   2273:         break;
1.1       misho    2274:         }
                   2275:       break;
                   2276: 
                   2277:       /*-----------------------------------------------------------------*/
1.1.1.3   misho    2278:       /* Match a negated single character casefully. */
1.1       misho    2279: 
                   2280:       case OP_NOT:
                   2281:       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2282:       break;
                   2283: 
                   2284:       /*-----------------------------------------------------------------*/
1.1.1.3   misho    2285:       /* Match a negated single character caselessly. */
1.1       misho    2286: 
                   2287:       case OP_NOTI:
1.1.1.3   misho    2288:       if (clen > 0)
                   2289:         {
                   2290:         unsigned int otherd;
                   2291: #ifdef SUPPORT_UTF
                   2292:         if (utf && d >= 128)
                   2293:           {
                   2294: #ifdef SUPPORT_UCP
                   2295:           otherd = UCD_OTHERCASE(d);
                   2296: #endif  /* SUPPORT_UCP */
                   2297:           }
                   2298:         else
                   2299: #endif  /* SUPPORT_UTF */
                   2300:         otherd = TABLE_GET(d, fcc, d);
                   2301:         if (c != d && c != otherd)
                   2302:           { ADD_NEW(state_offset + dlen + 1, 0); }
                   2303:         }
1.1       misho    2304:       break;
                   2305: 
                   2306:       /*-----------------------------------------------------------------*/
                   2307:       case OP_PLUSI:
                   2308:       case OP_MINPLUSI:
                   2309:       case OP_POSPLUSI:
                   2310:       case OP_NOTPLUSI:
                   2311:       case OP_NOTMINPLUSI:
                   2312:       case OP_NOTPOSPLUSI:
                   2313:       caseless = TRUE;
                   2314:       codevalue -= OP_STARI - OP_STAR;
                   2315: 
                   2316:       /* Fall through */
                   2317:       case OP_PLUS:
                   2318:       case OP_MINPLUS:
                   2319:       case OP_POSPLUS:
                   2320:       case OP_NOTPLUS:
                   2321:       case OP_NOTMINPLUS:
                   2322:       case OP_NOTPOSPLUS:
                   2323:       count = current_state->count;  /* Already matched */
                   2324:       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
                   2325:       if (clen > 0)
                   2326:         {
1.1.1.4   misho    2327:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2328:         if (caseless)
                   2329:           {
1.1.1.2   misho    2330: #ifdef SUPPORT_UTF
                   2331:           if (utf && d >= 128)
1.1       misho    2332:             {
                   2333: #ifdef SUPPORT_UCP
                   2334:             otherd = UCD_OTHERCASE(d);
                   2335: #endif  /* SUPPORT_UCP */
                   2336:             }
                   2337:           else
1.1.1.2   misho    2338: #endif  /* SUPPORT_UTF */
                   2339:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2340:           }
                   2341:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2342:           {
                   2343:           if (count > 0 &&
                   2344:               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
                   2345:             {
                   2346:             active_count--;             /* Remove non-match possibility */
                   2347:             next_active_state--;
                   2348:             }
                   2349:           count++;
                   2350:           ADD_NEW(state_offset, count);
                   2351:           }
                   2352:         }
                   2353:       break;
                   2354: 
                   2355:       /*-----------------------------------------------------------------*/
                   2356:       case OP_QUERYI:
                   2357:       case OP_MINQUERYI:
                   2358:       case OP_POSQUERYI:
                   2359:       case OP_NOTQUERYI:
                   2360:       case OP_NOTMINQUERYI:
                   2361:       case OP_NOTPOSQUERYI:
                   2362:       caseless = TRUE;
                   2363:       codevalue -= OP_STARI - OP_STAR;
                   2364:       /* Fall through */
                   2365:       case OP_QUERY:
                   2366:       case OP_MINQUERY:
                   2367:       case OP_POSQUERY:
                   2368:       case OP_NOTQUERY:
                   2369:       case OP_NOTMINQUERY:
                   2370:       case OP_NOTPOSQUERY:
                   2371:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2372:       if (clen > 0)
                   2373:         {
1.1.1.4   misho    2374:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2375:         if (caseless)
                   2376:           {
1.1.1.2   misho    2377: #ifdef SUPPORT_UTF
                   2378:           if (utf && d >= 128)
1.1       misho    2379:             {
                   2380: #ifdef SUPPORT_UCP
                   2381:             otherd = UCD_OTHERCASE(d);
                   2382: #endif  /* SUPPORT_UCP */
                   2383:             }
                   2384:           else
1.1.1.2   misho    2385: #endif  /* SUPPORT_UTF */
                   2386:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2387:           }
                   2388:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2389:           {
                   2390:           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
                   2391:             {
                   2392:             active_count--;            /* Remove non-match possibility */
                   2393:             next_active_state--;
                   2394:             }
                   2395:           ADD_NEW(state_offset + dlen + 1, 0);
                   2396:           }
                   2397:         }
                   2398:       break;
                   2399: 
                   2400:       /*-----------------------------------------------------------------*/
                   2401:       case OP_STARI:
                   2402:       case OP_MINSTARI:
                   2403:       case OP_POSSTARI:
                   2404:       case OP_NOTSTARI:
                   2405:       case OP_NOTMINSTARI:
                   2406:       case OP_NOTPOSSTARI:
                   2407:       caseless = TRUE;
                   2408:       codevalue -= OP_STARI - OP_STAR;
                   2409:       /* Fall through */
                   2410:       case OP_STAR:
                   2411:       case OP_MINSTAR:
                   2412:       case OP_POSSTAR:
                   2413:       case OP_NOTSTAR:
                   2414:       case OP_NOTMINSTAR:
                   2415:       case OP_NOTPOSSTAR:
                   2416:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2417:       if (clen > 0)
                   2418:         {
1.1.1.4   misho    2419:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2420:         if (caseless)
                   2421:           {
1.1.1.2   misho    2422: #ifdef SUPPORT_UTF
                   2423:           if (utf && d >= 128)
1.1       misho    2424:             {
                   2425: #ifdef SUPPORT_UCP
                   2426:             otherd = UCD_OTHERCASE(d);
                   2427: #endif  /* SUPPORT_UCP */
                   2428:             }
                   2429:           else
1.1.1.2   misho    2430: #endif  /* SUPPORT_UTF */
                   2431:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2432:           }
                   2433:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2434:           {
                   2435:           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
                   2436:             {
                   2437:             active_count--;            /* Remove non-match possibility */
                   2438:             next_active_state--;
                   2439:             }
                   2440:           ADD_NEW(state_offset, 0);
                   2441:           }
                   2442:         }
                   2443:       break;
                   2444: 
                   2445:       /*-----------------------------------------------------------------*/
                   2446:       case OP_EXACTI:
                   2447:       case OP_NOTEXACTI:
                   2448:       caseless = TRUE;
                   2449:       codevalue -= OP_STARI - OP_STAR;
                   2450:       /* Fall through */
                   2451:       case OP_EXACT:
                   2452:       case OP_NOTEXACT:
                   2453:       count = current_state->count;  /* Number already matched */
                   2454:       if (clen > 0)
                   2455:         {
1.1.1.4   misho    2456:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2457:         if (caseless)
                   2458:           {
1.1.1.2   misho    2459: #ifdef SUPPORT_UTF
                   2460:           if (utf && d >= 128)
1.1       misho    2461:             {
                   2462: #ifdef SUPPORT_UCP
                   2463:             otherd = UCD_OTHERCASE(d);
                   2464: #endif  /* SUPPORT_UCP */
                   2465:             }
                   2466:           else
1.1.1.2   misho    2467: #endif  /* SUPPORT_UTF */
                   2468:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2469:           }
                   2470:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2471:           {
1.1.1.4   misho    2472:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2473:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
1.1       misho    2474:           else
                   2475:             { ADD_NEW(state_offset, count); }
                   2476:           }
                   2477:         }
                   2478:       break;
                   2479: 
                   2480:       /*-----------------------------------------------------------------*/
                   2481:       case OP_UPTOI:
                   2482:       case OP_MINUPTOI:
                   2483:       case OP_POSUPTOI:
                   2484:       case OP_NOTUPTOI:
                   2485:       case OP_NOTMINUPTOI:
                   2486:       case OP_NOTPOSUPTOI:
                   2487:       caseless = TRUE;
                   2488:       codevalue -= OP_STARI - OP_STAR;
                   2489:       /* Fall through */
                   2490:       case OP_UPTO:
                   2491:       case OP_MINUPTO:
                   2492:       case OP_POSUPTO:
                   2493:       case OP_NOTUPTO:
                   2494:       case OP_NOTMINUPTO:
                   2495:       case OP_NOTPOSUPTO:
1.1.1.2   misho    2496:       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
1.1       misho    2497:       count = current_state->count;  /* Number already matched */
                   2498:       if (clen > 0)
                   2499:         {
1.1.1.4   misho    2500:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2501:         if (caseless)
                   2502:           {
1.1.1.2   misho    2503: #ifdef SUPPORT_UTF
                   2504:           if (utf && d >= 128)
1.1       misho    2505:             {
                   2506: #ifdef SUPPORT_UCP
                   2507:             otherd = UCD_OTHERCASE(d);
                   2508: #endif  /* SUPPORT_UCP */
                   2509:             }
                   2510:           else
1.1.1.2   misho    2511: #endif  /* SUPPORT_UTF */
                   2512:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2513:           }
                   2514:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2515:           {
                   2516:           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
                   2517:             {
                   2518:             active_count--;             /* Remove non-match possibility */
                   2519:             next_active_state--;
                   2520:             }
1.1.1.4   misho    2521:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2522:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
1.1       misho    2523:           else
                   2524:             { ADD_NEW(state_offset, count); }
                   2525:           }
                   2526:         }
                   2527:       break;
                   2528: 
                   2529: 
                   2530: /* ========================================================================== */
                   2531:       /* These are the class-handling opcodes */
                   2532: 
                   2533:       case OP_CLASS:
                   2534:       case OP_NCLASS:
                   2535:       case OP_XCLASS:
                   2536:         {
                   2537:         BOOL isinclass = FALSE;
                   2538:         int next_state_offset;
1.1.1.2   misho    2539:         const pcre_uchar *ecode;
1.1       misho    2540: 
                   2541:         /* For a simple class, there is always just a 32-byte table, and we
                   2542:         can set isinclass from it. */
                   2543: 
                   2544:         if (codevalue != OP_XCLASS)
                   2545:           {
1.1.1.2   misho    2546:           ecode = code + 1 + (32 / sizeof(pcre_uchar));
1.1       misho    2547:           if (clen > 0)
                   2548:             {
                   2549:             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
1.1.1.2   misho    2550:               ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
1.1       misho    2551:             }
                   2552:           }
                   2553: 
                   2554:         /* An extended class may have a table or a list of single characters,
                   2555:         ranges, or both, and it may be positive or negative. There's a
                   2556:         function that sorts all this out. */
                   2557: 
                   2558:         else
                   2559:          {
                   2560:          ecode = code + GET(code, 1);
1.1.1.2   misho    2561:          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
1.1       misho    2562:          }
                   2563: 
                   2564:         /* At this point, isinclass is set for all kinds of class, and ecode
                   2565:         points to the byte after the end of the class. If there is a
                   2566:         quantifier, this is where it will be. */
                   2567: 
                   2568:         next_state_offset = (int)(ecode - start_code);
                   2569: 
                   2570:         switch (*ecode)
                   2571:           {
                   2572:           case OP_CRSTAR:
                   2573:           case OP_CRMINSTAR:
1.1.1.5 ! misho    2574:           case OP_CRPOSSTAR:
1.1       misho    2575:           ADD_ACTIVE(next_state_offset + 1, 0);
1.1.1.5 ! misho    2576:           if (isinclass)
        !          2577:             {
        !          2578:             if (*ecode == OP_CRPOSSTAR)
        !          2579:               {
        !          2580:               active_count--;           /* Remove non-match possibility */
        !          2581:               next_active_state--;
        !          2582:               }
        !          2583:             ADD_NEW(state_offset, 0);
        !          2584:             }
1.1       misho    2585:           break;
                   2586: 
                   2587:           case OP_CRPLUS:
                   2588:           case OP_CRMINPLUS:
1.1.1.5 ! misho    2589:           case OP_CRPOSPLUS:
1.1       misho    2590:           count = current_state->count;  /* Already matched */
                   2591:           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
1.1.1.5 ! misho    2592:           if (isinclass)
        !          2593:             {
        !          2594:             if (count > 0 && *ecode == OP_CRPOSPLUS)
        !          2595:               {
        !          2596:               active_count--;           /* Remove non-match possibility */
        !          2597:               next_active_state--;
        !          2598:               }
        !          2599:             count++;
        !          2600:             ADD_NEW(state_offset, count);
        !          2601:             }
1.1       misho    2602:           break;
                   2603: 
                   2604:           case OP_CRQUERY:
                   2605:           case OP_CRMINQUERY:
1.1.1.5 ! misho    2606:           case OP_CRPOSQUERY:
1.1       misho    2607:           ADD_ACTIVE(next_state_offset + 1, 0);
1.1.1.5 ! misho    2608:           if (isinclass)
        !          2609:             {
        !          2610:             if (*ecode == OP_CRPOSQUERY)
        !          2611:               {
        !          2612:               active_count--;           /* Remove non-match possibility */
        !          2613:               next_active_state--;
        !          2614:               }
        !          2615:             ADD_NEW(next_state_offset + 1, 0);
        !          2616:             }
1.1       misho    2617:           break;
                   2618: 
                   2619:           case OP_CRRANGE:
                   2620:           case OP_CRMINRANGE:
1.1.1.5 ! misho    2621:           case OP_CRPOSRANGE:
1.1       misho    2622:           count = current_state->count;  /* Already matched */
1.1.1.4   misho    2623:           if (count >= (int)GET2(ecode, 1))
1.1.1.2   misho    2624:             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
1.1       misho    2625:           if (isinclass)
                   2626:             {
1.1.1.4   misho    2627:             int max = (int)GET2(ecode, 1 + IMM2_SIZE);
1.1.1.5 ! misho    2628:             if (*ecode == OP_CRPOSRANGE)
        !          2629:               {
        !          2630:               active_count--;           /* Remove non-match possibility */
        !          2631:               next_active_state--;
        !          2632:               }
1.1       misho    2633:             if (++count >= max && max != 0)   /* Max 0 => no limit */
1.1.1.2   misho    2634:               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
1.1       misho    2635:             else
                   2636:               { ADD_NEW(state_offset, count); }
                   2637:             }
                   2638:           break;
                   2639: 
                   2640:           default:
                   2641:           if (isinclass) { ADD_NEW(next_state_offset, 0); }
                   2642:           break;
                   2643:           }
                   2644:         }
                   2645:       break;
                   2646: 
                   2647: /* ========================================================================== */
                   2648:       /* These are the opcodes for fancy brackets of various kinds. We have
                   2649:       to use recursion in order to handle them. The "always failing" assertion
                   2650:       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
                   2651:       though the other "backtracking verbs" are not supported. */
                   2652: 
                   2653:       case OP_FAIL:
                   2654:       forced_fail++;    /* Count FAILs for multiple states */
                   2655:       break;
                   2656: 
                   2657:       case OP_ASSERT:
                   2658:       case OP_ASSERT_NOT:
                   2659:       case OP_ASSERTBACK:
                   2660:       case OP_ASSERTBACK_NOT:
                   2661:         {
                   2662:         int rc;
                   2663:         int local_offsets[2];
                   2664:         int local_workspace[1000];
1.1.1.2   misho    2665:         const pcre_uchar *endasscode = code + GET(code, 1);
1.1       misho    2666: 
                   2667:         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2668: 
                   2669:         rc = internal_dfa_exec(
                   2670:           md,                                   /* static match data */
                   2671:           code,                                 /* this subexpression's code */
                   2672:           ptr,                                  /* where we currently are */
                   2673:           (int)(ptr - start_subject),           /* start offset */
                   2674:           local_offsets,                        /* offset vector */
                   2675:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2676:           local_workspace,                      /* workspace vector */
                   2677:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2678:           rlevel);                              /* function recursion level */
                   2679: 
                   2680:         if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2681:         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
                   2682:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2683:         }
                   2684:       break;
                   2685: 
                   2686:       /*-----------------------------------------------------------------*/
                   2687:       case OP_COND:
                   2688:       case OP_SCOND:
                   2689:         {
                   2690:         int local_offsets[1000];
                   2691:         int local_workspace[1000];
                   2692:         int codelink = GET(code, 1);
                   2693:         int condcode;
                   2694: 
                   2695:         /* Because of the way auto-callout works during compile, a callout item
                   2696:         is inserted between OP_COND and an assertion condition. This does not
                   2697:         happen for the other conditions. */
                   2698: 
                   2699:         if (code[LINK_SIZE+1] == OP_CALLOUT)
                   2700:           {
                   2701:           rrc = 0;
1.1.1.2   misho    2702:           if (PUBL(callout) != NULL)
1.1       misho    2703:             {
1.1.1.2   misho    2704:             PUBL(callout_block) cb;
1.1       misho    2705:             cb.version          = 1;   /* Version 1 of the callout block */
                   2706:             cb.callout_number   = code[LINK_SIZE+2];
                   2707:             cb.offset_vector    = offsets;
1.1.1.4   misho    2708: #if defined COMPILE_PCRE8
1.1       misho    2709:             cb.subject          = (PCRE_SPTR)start_subject;
1.1.1.4   misho    2710: #elif defined COMPILE_PCRE16
1.1.1.2   misho    2711:             cb.subject          = (PCRE_SPTR16)start_subject;
1.1.1.4   misho    2712: #elif defined COMPILE_PCRE32
                   2713:             cb.subject          = (PCRE_SPTR32)start_subject;
1.1.1.2   misho    2714: #endif
1.1       misho    2715:             cb.subject_length   = (int)(end_subject - start_subject);
                   2716:             cb.start_match      = (int)(current_subject - start_subject);
                   2717:             cb.current_position = (int)(ptr - start_subject);
                   2718:             cb.pattern_position = GET(code, LINK_SIZE + 3);
                   2719:             cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
                   2720:             cb.capture_top      = 1;
                   2721:             cb.capture_last     = -1;
                   2722:             cb.callout_data     = md->callout_data;
                   2723:             cb.mark             = NULL;   /* No (*MARK) support */
1.1.1.2   misho    2724:             if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
1.1       misho    2725:             }
                   2726:           if (rrc > 0) break;                      /* Fail this thread */
1.1.1.2   misho    2727:           code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
1.1       misho    2728:           }
                   2729: 
                   2730:         condcode = code[LINK_SIZE+1];
                   2731: 
1.1.1.5 ! misho    2732:         /* Back reference conditions and duplicate named recursion conditions
        !          2733:         are not supported */
1.1       misho    2734: 
1.1.1.5 ! misho    2735:         if (condcode == OP_CREF || condcode == OP_DNCREF ||
        !          2736:             condcode == OP_DNRREF)
1.1       misho    2737:           return PCRE_ERROR_DFA_UCOND;
                   2738: 
                   2739:         /* The DEFINE condition is always false */
                   2740: 
                   2741:         if (condcode == OP_DEF)
                   2742:           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2743: 
                   2744:         /* The only supported version of OP_RREF is for the value RREF_ANY,
                   2745:         which means "test if in any recursion". We can't test for specifically
                   2746:         recursed groups. */
                   2747: 
1.1.1.5 ! misho    2748:         else if (condcode == OP_RREF)
1.1       misho    2749:           {
1.1.1.2   misho    2750:           int value = GET2(code, LINK_SIZE + 2);
1.1       misho    2751:           if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
                   2752:           if (md->recursive != NULL)
1.1.1.2   misho    2753:             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
1.1       misho    2754:           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2755:           }
                   2756: 
                   2757:         /* Otherwise, the condition is an assertion */
                   2758: 
                   2759:         else
                   2760:           {
                   2761:           int rc;
1.1.1.2   misho    2762:           const pcre_uchar *asscode = code + LINK_SIZE + 1;
                   2763:           const pcre_uchar *endasscode = asscode + GET(asscode, 1);
1.1       misho    2764: 
                   2765:           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2766: 
                   2767:           rc = internal_dfa_exec(
                   2768:             md,                                   /* fixed match data */
                   2769:             asscode,                              /* this subexpression's code */
                   2770:             ptr,                                  /* where we currently are */
                   2771:             (int)(ptr - start_subject),           /* start offset */
                   2772:             local_offsets,                        /* offset vector */
                   2773:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2774:             local_workspace,                      /* workspace vector */
                   2775:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2776:             rlevel);                              /* function recursion level */
                   2777: 
                   2778:           if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2779:           if ((rc >= 0) ==
                   2780:                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
                   2781:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2782:           else
                   2783:             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2784:           }
                   2785:         }
                   2786:       break;
                   2787: 
                   2788:       /*-----------------------------------------------------------------*/
                   2789:       case OP_RECURSE:
                   2790:         {
                   2791:         dfa_recursion_info *ri;
                   2792:         int local_offsets[1000];
                   2793:         int local_workspace[1000];
1.1.1.2   misho    2794:         const pcre_uchar *callpat = start_code + GET(code, 1);
1.1       misho    2795:         int recno = (callpat == md->start_code)? 0 :
                   2796:           GET2(callpat, 1 + LINK_SIZE);
                   2797:         int rc;
                   2798: 
                   2799:         DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
                   2800: 
                   2801:         /* Check for repeating a recursion without advancing the subject
                   2802:         pointer. This should catch convoluted mutual recursions. (Some simple
                   2803:         cases are caught at compile time.) */
                   2804: 
                   2805:         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   2806:           if (recno == ri->group_num && ptr == ri->subject_position)
                   2807:             return PCRE_ERROR_RECURSELOOP;
                   2808: 
                   2809:         /* Remember this recursion and where we started it so as to
                   2810:         catch infinite loops. */
                   2811: 
                   2812:         new_recursive.group_num = recno;
                   2813:         new_recursive.subject_position = ptr;
                   2814:         new_recursive.prevrec = md->recursive;
                   2815:         md->recursive = &new_recursive;
                   2816: 
                   2817:         rc = internal_dfa_exec(
                   2818:           md,                                   /* fixed match data */
                   2819:           callpat,                              /* this subexpression's code */
                   2820:           ptr,                                  /* where we currently are */
                   2821:           (int)(ptr - start_subject),           /* start offset */
                   2822:           local_offsets,                        /* offset vector */
                   2823:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2824:           local_workspace,                      /* workspace vector */
                   2825:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2826:           rlevel);                              /* function recursion level */
                   2827: 
                   2828:         md->recursive = new_recursive.prevrec;  /* Done this recursion */
                   2829: 
                   2830:         DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
                   2831:           rc));
                   2832: 
                   2833:         /* Ran out of internal offsets */
                   2834: 
                   2835:         if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
                   2836: 
                   2837:         /* For each successful matched substring, set up the next state with a
                   2838:         count of characters to skip before trying it. Note that the count is in
                   2839:         characters, not bytes. */
                   2840: 
                   2841:         if (rc > 0)
                   2842:           {
                   2843:           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
                   2844:             {
                   2845:             int charcount = local_offsets[rc+1] - local_offsets[rc];
1.1.1.4   misho    2846: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.3   misho    2847:             if (utf)
                   2848:               {
                   2849:               const pcre_uchar *p = start_subject + local_offsets[rc];
                   2850:               const pcre_uchar *pp = start_subject + local_offsets[rc+1];
                   2851:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
                   2852:               }
1.1.1.2   misho    2853: #endif
1.1       misho    2854:             if (charcount > 0)
                   2855:               {
                   2856:               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
                   2857:               }
                   2858:             else
                   2859:               {
                   2860:               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
                   2861:               }
                   2862:             }
                   2863:           }
                   2864:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2865:         }
                   2866:       break;
                   2867: 
                   2868:       /*-----------------------------------------------------------------*/
                   2869:       case OP_BRAPOS:
                   2870:       case OP_SBRAPOS:
                   2871:       case OP_CBRAPOS:
                   2872:       case OP_SCBRAPOS:
                   2873:       case OP_BRAPOSZERO:
                   2874:         {
                   2875:         int charcount, matched_count;
1.1.1.2   misho    2876:         const pcre_uchar *local_ptr = ptr;
1.1       misho    2877:         BOOL allow_zero;
                   2878: 
                   2879:         if (codevalue == OP_BRAPOSZERO)
                   2880:           {
                   2881:           allow_zero = TRUE;
                   2882:           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
                   2883:           }
                   2884:         else allow_zero = FALSE;
                   2885: 
                   2886:         /* Loop to match the subpattern as many times as possible as if it were
                   2887:         a complete pattern. */
                   2888: 
                   2889:         for (matched_count = 0;; matched_count++)
                   2890:           {
                   2891:           int local_offsets[2];
                   2892:           int local_workspace[1000];
                   2893: 
                   2894:           int rc = internal_dfa_exec(
                   2895:             md,                                   /* fixed match data */
                   2896:             code,                                 /* this subexpression's code */
                   2897:             local_ptr,                            /* where we currently are */
                   2898:             (int)(ptr - start_subject),           /* start offset */
                   2899:             local_offsets,                        /* offset vector */
                   2900:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2901:             local_workspace,                      /* workspace vector */
                   2902:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2903:             rlevel);                              /* function recursion level */
                   2904: 
                   2905:           /* Failed to match */
                   2906: 
                   2907:           if (rc < 0)
                   2908:             {
                   2909:             if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2910:             break;
                   2911:             }
                   2912: 
                   2913:           /* Matched: break the loop if zero characters matched. */
                   2914: 
                   2915:           charcount = local_offsets[1] - local_offsets[0];
                   2916:           if (charcount == 0) break;
                   2917:           local_ptr += charcount;    /* Advance temporary position ptr */
                   2918:           }
                   2919: 
                   2920:         /* At this point we have matched the subpattern matched_count
                   2921:         times, and local_ptr is pointing to the character after the end of the
                   2922:         last match. */
                   2923: 
                   2924:         if (matched_count > 0 || allow_zero)
                   2925:           {
1.1.1.2   misho    2926:           const pcre_uchar *end_subpattern = code;
1.1       misho    2927:           int next_state_offset;
                   2928: 
                   2929:           do { end_subpattern += GET(end_subpattern, 1); }
                   2930:             while (*end_subpattern == OP_ALT);
                   2931:           next_state_offset =
                   2932:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2933: 
                   2934:           /* Optimization: if there are no more active states, and there
                   2935:           are no new states yet set up, then skip over the subject string
                   2936:           right here, to save looping. Otherwise, set up the new state to swing
                   2937:           into action when the end of the matched substring is reached. */
                   2938: 
                   2939:           if (i + 1 >= active_count && new_count == 0)
                   2940:             {
                   2941:             ptr = local_ptr;
                   2942:             clen = 0;
                   2943:             ADD_NEW(next_state_offset, 0);
                   2944:             }
                   2945:           else
                   2946:             {
1.1.1.2   misho    2947:             const pcre_uchar *p = ptr;
                   2948:             const pcre_uchar *pp = local_ptr;
1.1       misho    2949:             charcount = (int)(pp - p);
1.1.1.4   misho    2950: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.3   misho    2951:             if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
1.1.1.2   misho    2952: #endif
1.1       misho    2953:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2954:             }
                   2955:           }
                   2956:         }
                   2957:       break;
                   2958: 
                   2959:       /*-----------------------------------------------------------------*/
                   2960:       case OP_ONCE:
                   2961:       case OP_ONCE_NC:
                   2962:         {
                   2963:         int local_offsets[2];
                   2964:         int local_workspace[1000];
                   2965: 
                   2966:         int rc = internal_dfa_exec(
                   2967:           md,                                   /* fixed match data */
                   2968:           code,                                 /* this subexpression's code */
                   2969:           ptr,                                  /* where we currently are */
                   2970:           (int)(ptr - start_subject),           /* start offset */
                   2971:           local_offsets,                        /* offset vector */
                   2972:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2973:           local_workspace,                      /* workspace vector */
                   2974:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2975:           rlevel);                              /* function recursion level */
                   2976: 
                   2977:         if (rc >= 0)
                   2978:           {
1.1.1.2   misho    2979:           const pcre_uchar *end_subpattern = code;
1.1       misho    2980:           int charcount = local_offsets[1] - local_offsets[0];
                   2981:           int next_state_offset, repeat_state_offset;
                   2982: 
                   2983:           do { end_subpattern += GET(end_subpattern, 1); }
                   2984:             while (*end_subpattern == OP_ALT);
                   2985:           next_state_offset =
                   2986:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2987: 
                   2988:           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
                   2989:           arrange for the repeat state also to be added to the relevant list.
                   2990:           Calculate the offset, or set -1 for no repeat. */
                   2991: 
                   2992:           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
                   2993:                                  *end_subpattern == OP_KETRMIN)?
                   2994:             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
                   2995: 
                   2996:           /* If we have matched an empty string, add the next state at the
                   2997:           current character pointer. This is important so that the duplicate
                   2998:           checking kicks in, which is what breaks infinite loops that match an
                   2999:           empty string. */
                   3000: 
                   3001:           if (charcount == 0)
                   3002:             {
                   3003:             ADD_ACTIVE(next_state_offset, 0);
                   3004:             }
                   3005: 
                   3006:           /* Optimization: if there are no more active states, and there
                   3007:           are no new states yet set up, then skip over the subject string
                   3008:           right here, to save looping. Otherwise, set up the new state to swing
                   3009:           into action when the end of the matched substring is reached. */
                   3010: 
                   3011:           else if (i + 1 >= active_count && new_count == 0)
                   3012:             {
                   3013:             ptr += charcount;
                   3014:             clen = 0;
                   3015:             ADD_NEW(next_state_offset, 0);
                   3016: 
                   3017:             /* If we are adding a repeat state at the new character position,
                   3018:             we must fudge things so that it is the only current state.
                   3019:             Otherwise, it might be a duplicate of one we processed before, and
                   3020:             that would cause it to be skipped. */
                   3021: 
                   3022:             if (repeat_state_offset >= 0)
                   3023:               {
                   3024:               next_active_state = active_states;
                   3025:               active_count = 0;
                   3026:               i = -1;
                   3027:               ADD_ACTIVE(repeat_state_offset, 0);
                   3028:               }
                   3029:             }
                   3030:           else
                   3031:             {
1.1.1.4   misho    3032: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.3   misho    3033:             if (utf)
                   3034:               {
                   3035:               const pcre_uchar *p = start_subject + local_offsets[0];
                   3036:               const pcre_uchar *pp = start_subject + local_offsets[1];
                   3037:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
                   3038:               }
1.1.1.2   misho    3039: #endif
1.1       misho    3040:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   3041:             if (repeat_state_offset >= 0)
                   3042:               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
                   3043:             }
                   3044:           }
                   3045:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   3046:         }
                   3047:       break;
                   3048: 
                   3049: 
                   3050: /* ========================================================================== */
                   3051:       /* Handle callouts */
                   3052: 
                   3053:       case OP_CALLOUT:
                   3054:       rrc = 0;
1.1.1.2   misho    3055:       if (PUBL(callout) != NULL)
1.1       misho    3056:         {
1.1.1.2   misho    3057:         PUBL(callout_block) cb;
1.1       misho    3058:         cb.version          = 1;   /* Version 1 of the callout block */
                   3059:         cb.callout_number   = code[1];
                   3060:         cb.offset_vector    = offsets;
1.1.1.4   misho    3061: #if defined COMPILE_PCRE8
1.1       misho    3062:         cb.subject          = (PCRE_SPTR)start_subject;
1.1.1.4   misho    3063: #elif defined COMPILE_PCRE16
1.1.1.2   misho    3064:         cb.subject          = (PCRE_SPTR16)start_subject;
1.1.1.4   misho    3065: #elif defined COMPILE_PCRE32
                   3066:         cb.subject          = (PCRE_SPTR32)start_subject;
1.1.1.2   misho    3067: #endif
1.1       misho    3068:         cb.subject_length   = (int)(end_subject - start_subject);
                   3069:         cb.start_match      = (int)(current_subject - start_subject);
                   3070:         cb.current_position = (int)(ptr - start_subject);
                   3071:         cb.pattern_position = GET(code, 2);
                   3072:         cb.next_item_length = GET(code, 2 + LINK_SIZE);
                   3073:         cb.capture_top      = 1;
                   3074:         cb.capture_last     = -1;
                   3075:         cb.callout_data     = md->callout_data;
                   3076:         cb.mark             = NULL;   /* No (*MARK) support */
1.1.1.2   misho    3077:         if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
1.1       misho    3078:         }
                   3079:       if (rrc == 0)
1.1.1.2   misho    3080:         { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
1.1       misho    3081:       break;
                   3082: 
                   3083: 
                   3084: /* ========================================================================== */
                   3085:       default:        /* Unsupported opcode */
                   3086:       return PCRE_ERROR_DFA_UITEM;
                   3087:       }
                   3088: 
                   3089:     NEXT_ACTIVE_STATE: continue;
                   3090: 
                   3091:     }      /* End of loop scanning active states */
                   3092: 
                   3093:   /* We have finished the processing at the current subject character. If no
                   3094:   new states have been set for the next character, we have found all the
                   3095:   matches that we are going to find. If we are at the top level and partial
                   3096:   matching has been requested, check for appropriate conditions.
                   3097: 
                   3098:   The "forced_ fail" variable counts the number of (*F) encountered for the
                   3099:   character. If it is equal to the original active_count (saved in
                   3100:   workspace[1]) it means that (*F) was found on every active state. In this
                   3101:   case we don't want to give a partial match.
                   3102: 
                   3103:   The "could_continue" variable is true if a state could have continued but
                   3104:   for the fact that the end of the subject was reached. */
                   3105: 
                   3106:   if (new_count <= 0)
                   3107:     {
                   3108:     if (rlevel == 1 &&                               /* Top level, and */
1.1.1.3   misho    3109:         could_continue &&                            /* Some could go on, and */
1.1       misho    3110:         forced_fail != workspace[1] &&               /* Not all forced fail & */
                   3111:         (                                            /* either... */
                   3112:         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
                   3113:         ||                                           /* or... */
                   3114:         ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
                   3115:          match_count < 0)                            /* no matches */
                   3116:         ) &&                                         /* And... */
1.1.1.3   misho    3117:         (
                   3118:         partial_newline ||                           /* Either partial NL */
                   3119:           (                                          /* or ... */
                   3120:           ptr >= end_subject &&                /* End of subject and */
                   3121:           ptr > md->start_used_ptr)            /* Inspected non-empty string */
                   3122:           )
                   3123:         )
1.1       misho    3124:       match_count = PCRE_ERROR_PARTIAL;
                   3125:     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                   3126:       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
                   3127:       rlevel*2-2, SP));
                   3128:     break;        /* In effect, "return", but see the comment below */
                   3129:     }
                   3130: 
                   3131:   /* One or more states are active for the next character. */
                   3132: 
                   3133:   ptr += clen;    /* Advance to next subject character */
                   3134:   }               /* Loop to move along the subject string */
                   3135: 
                   3136: /* Control gets here from "break" a few lines above. We do it this way because
                   3137: if we use "return" above, we have compiler trouble. Some compilers warn if
                   3138: there's nothing here because they think the function doesn't return a value. On
                   3139: the other hand, if we put a dummy statement here, some more clever compilers
                   3140: complain that it can't be reached. Sigh. */
                   3141: 
                   3142: return match_count;
                   3143: }
                   3144: 
                   3145: 
                   3146: 
                   3147: 
                   3148: /*************************************************
                   3149: *    Execute a Regular Expression - DFA engine   *
                   3150: *************************************************/
                   3151: 
                   3152: /* This external function applies a compiled re to a subject string using a DFA
                   3153: engine. This function calls the internal function multiple times if the pattern
                   3154: is not anchored.
                   3155: 
                   3156: Arguments:
                   3157:   argument_re     points to the compiled expression
                   3158:   extra_data      points to extra data or is NULL
                   3159:   subject         points to the subject string
                   3160:   length          length of subject string (may contain binary zeros)
                   3161:   start_offset    where to start in the subject string
                   3162:   options         option bits
                   3163:   offsets         vector of match offsets
                   3164:   offsetcount     size of same
                   3165:   workspace       workspace vector
                   3166:   wscount         size of same
                   3167: 
                   3168: Returns:          > 0 => number of match offset pairs placed in offsets
                   3169:                   = 0 => offsets overflowed; longest matches are present
                   3170:                    -1 => failed to match
                   3171:                  < -1 => some kind of unexpected problem
                   3172: */
                   3173: 
1.1.1.4   misho    3174: #if defined COMPILE_PCRE8
1.1       misho    3175: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3176: pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   3177:   const char *subject, int length, int start_offset, int options, int *offsets,
                   3178:   int offsetcount, int *workspace, int wscount)
1.1.1.4   misho    3179: #elif defined COMPILE_PCRE16
1.1.1.2   misho    3180: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3181: pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
                   3182:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
                   3183:   int offsetcount, int *workspace, int wscount)
1.1.1.4   misho    3184: #elif defined COMPILE_PCRE32
                   3185: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3186: pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
                   3187:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
                   3188:   int offsetcount, int *workspace, int wscount)
1.1.1.2   misho    3189: #endif
1.1       misho    3190: {
1.1.1.2   misho    3191: REAL_PCRE *re = (REAL_PCRE *)argument_re;
1.1       misho    3192: dfa_match_data match_block;
                   3193: dfa_match_data *md = &match_block;
1.1.1.2   misho    3194: BOOL utf, anchored, startline, firstline;
                   3195: const pcre_uchar *current_subject, *end_subject;
1.1       misho    3196: const pcre_study_data *study = NULL;
                   3197: 
1.1.1.2   misho    3198: const pcre_uchar *req_char_ptr;
                   3199: const pcre_uint8 *start_bits = NULL;
                   3200: BOOL has_first_char = FALSE;
                   3201: BOOL has_req_char = FALSE;
                   3202: pcre_uchar first_char = 0;
                   3203: pcre_uchar first_char2 = 0;
                   3204: pcre_uchar req_char = 0;
                   3205: pcre_uchar req_char2 = 0;
1.1       misho    3206: int newline;
                   3207: 
                   3208: /* Plausibility checks */
                   3209: 
                   3210: if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   3211: if (re == NULL || subject == NULL || workspace == NULL ||
                   3212:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   3213: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   3214: if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
1.1.1.4   misho    3215: if (length < 0) return PCRE_ERROR_BADLENGTH;
1.1       misho    3216: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   3217: 
1.1.1.3   misho    3218: /* Check that the first field in the block is the magic number. If it is not,
                   3219: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
                   3220: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
                   3221: means that the pattern is likely compiled with different endianness. */
                   3222: 
                   3223: if (re->magic_number != MAGIC_NUMBER)
                   3224:   return re->magic_number == REVERSED_MAGIC_NUMBER?
                   3225:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
                   3226: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
                   3227: 
                   3228: /* If restarting after a partial match, do some sanity checks on the contents
                   3229: of the workspace. */
                   3230: 
                   3231: if ((options & PCRE_DFA_RESTART) != 0)
                   3232:   {
                   3233:   if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
                   3234:     workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
                   3235:       return PCRE_ERROR_DFA_BADRESTART;
                   3236:   }
                   3237: 
                   3238: /* Set up study, callout, and table data */
1.1       misho    3239: 
                   3240: md->tables = re->tables;
                   3241: md->callout_data = NULL;
                   3242: 
                   3243: if (extra_data != NULL)
                   3244:   {
                   3245:   unsigned int flags = extra_data->flags;
                   3246:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   3247:     study = (const pcre_study_data *)extra_data->study_data;
                   3248:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
                   3249:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   3250:     return PCRE_ERROR_DFA_UMLIMIT;
                   3251:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   3252:     md->callout_data = extra_data->callout_data;
                   3253:   if ((flags & PCRE_EXTRA_TABLES) != 0)
                   3254:     md->tables = extra_data->tables;
                   3255:   }
                   3256: 
                   3257: /* Set some local values */
                   3258: 
1.1.1.2   misho    3259: current_subject = (const pcre_uchar *)subject + start_offset;
                   3260: end_subject = (const pcre_uchar *)subject + length;
                   3261: req_char_ptr = current_subject - 1;
                   3262: 
                   3263: #ifdef SUPPORT_UTF
1.1.1.4   misho    3264: /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
1.1.1.2   misho    3265: utf = (re->options & PCRE_UTF8) != 0;
1.1       misho    3266: #else
1.1.1.2   misho    3267: utf = FALSE;
1.1       misho    3268: #endif
                   3269: 
                   3270: anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
                   3271:   (re->options & PCRE_ANCHORED) != 0;
                   3272: 
                   3273: /* The remaining fixed data for passing around. */
                   3274: 
1.1.1.2   misho    3275: md->start_code = (const pcre_uchar *)argument_re +
1.1       misho    3276:     re->name_table_offset + re->name_count * re->name_entry_size;
1.1.1.2   misho    3277: md->start_subject = (const pcre_uchar *)subject;
1.1       misho    3278: md->end_subject = end_subject;
                   3279: md->start_offset = start_offset;
                   3280: md->moptions = options;
                   3281: md->poptions = re->options;
                   3282: 
                   3283: /* If the BSR option is not set at match time, copy what was set
                   3284: at compile time. */
                   3285: 
                   3286: if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
                   3287:   {
                   3288:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   3289:     md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
                   3290: #ifdef BSR_ANYCRLF
                   3291:   else md->moptions |= PCRE_BSR_ANYCRLF;
                   3292: #endif
                   3293:   }
                   3294: 
                   3295: /* Handle different types of newline. The three bits give eight cases. If
                   3296: nothing is set at run time, whatever was used at compile time applies. */
                   3297: 
                   3298: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
                   3299:          PCRE_NEWLINE_BITS)
                   3300:   {
                   3301:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   3302:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   3303:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   3304:   case PCRE_NEWLINE_CR+
                   3305:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   3306:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   3307:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   3308:   default: return PCRE_ERROR_BADNEWLINE;
                   3309:   }
                   3310: 
                   3311: if (newline == -2)
                   3312:   {
                   3313:   md->nltype = NLTYPE_ANYCRLF;
                   3314:   }
                   3315: else if (newline < 0)
                   3316:   {
                   3317:   md->nltype = NLTYPE_ANY;
                   3318:   }
                   3319: else
                   3320:   {
                   3321:   md->nltype = NLTYPE_FIXED;
                   3322:   if (newline > 255)
                   3323:     {
                   3324:     md->nllen = 2;
                   3325:     md->nl[0] = (newline >> 8) & 255;
                   3326:     md->nl[1] = newline & 255;
                   3327:     }
                   3328:   else
                   3329:     {
                   3330:     md->nllen = 1;
                   3331:     md->nl[0] = newline;
                   3332:     }
                   3333:   }
                   3334: 
                   3335: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   3336: back the character offset. */
                   3337: 
1.1.1.2   misho    3338: #ifdef SUPPORT_UTF
                   3339: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
1.1       misho    3340:   {
                   3341:   int erroroffset;
1.1.1.2   misho    3342:   int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
1.1       misho    3343:   if (errorcode != 0)
                   3344:     {
                   3345:     if (offsetcount >= 2)
                   3346:       {
                   3347:       offsets[0] = erroroffset;
                   3348:       offsets[1] = errorcode;
                   3349:       }
1.1.1.4   misho    3350: #if defined COMPILE_PCRE8
                   3351:     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
1.1       misho    3352:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
1.1.1.4   misho    3353: #elif defined COMPILE_PCRE16
                   3354:     return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
                   3355:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
                   3356: #elif defined COMPILE_PCRE32
                   3357:     return PCRE_ERROR_BADUTF32;
                   3358: #endif
1.1       misho    3359:     }
1.1.1.4   misho    3360: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
1.1       misho    3361:   if (start_offset > 0 && start_offset < length &&
1.1.1.2   misho    3362:         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
1.1       misho    3363:     return PCRE_ERROR_BADUTF8_OFFSET;
1.1.1.4   misho    3364: #endif
1.1       misho    3365:   }
                   3366: #endif
                   3367: 
                   3368: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   3369: is a feature that makes it possible to save compiled regex and re-use them
                   3370: in other programs later. */
                   3371: 
1.1.1.2   misho    3372: if (md->tables == NULL) md->tables = PRIV(default_tables);
1.1       misho    3373: 
1.1.1.2   misho    3374: /* The "must be at the start of a line" flags are used in a loop when finding
                   3375: where to start. */
1.1       misho    3376: 
                   3377: startline = (re->flags & PCRE_STARTLINE) != 0;
                   3378: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   3379: 
                   3380: /* Set up the first character to match, if available. The first_byte value is
                   3381: never set for an anchored regular expression, but the anchoring may be forced
                   3382: at run time, so we have to test for anchoring. The first char may be unset for
                   3383: an unanchored pattern, of course. If there's no first char and the pattern was
                   3384: studied, there may be a bitmap of possible first characters. */
                   3385: 
                   3386: if (!anchored)
                   3387:   {
                   3388:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   3389:     {
1.1.1.2   misho    3390:     has_first_char = TRUE;
                   3391:     first_char = first_char2 = (pcre_uchar)(re->first_char);
                   3392:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
                   3393:       {
                   3394:       first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
                   3395: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   3396:       if (utf && first_char > 127)
                   3397:         first_char2 = UCD_OTHERCASE(first_char);
                   3398: #endif
                   3399:       }
1.1       misho    3400:     }
                   3401:   else
                   3402:     {
                   3403:     if (!startline && study != NULL &&
                   3404:          (study->flags & PCRE_STUDY_MAPPED) != 0)
                   3405:       start_bits = study->start_bits;
                   3406:     }
                   3407:   }
                   3408: 
                   3409: /* For anchored or unanchored matches, there may be a "last known required
                   3410: character" set. */
                   3411: 
                   3412: if ((re->flags & PCRE_REQCHSET) != 0)
                   3413:   {
1.1.1.2   misho    3414:   has_req_char = TRUE;
                   3415:   req_char = req_char2 = (pcre_uchar)(re->req_char);
                   3416:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
                   3417:     {
                   3418:     req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
                   3419: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   3420:     if (utf && req_char > 127)
                   3421:       req_char2 = UCD_OTHERCASE(req_char);
                   3422: #endif
                   3423:     }
1.1       misho    3424:   }
                   3425: 
                   3426: /* Call the main matching function, looping for a non-anchored regex after a
                   3427: failed match. If not restarting, perform certain optimizations at the start of
                   3428: a match. */
                   3429: 
                   3430: for (;;)
                   3431:   {
                   3432:   int rc;
                   3433: 
                   3434:   if ((options & PCRE_DFA_RESTART) == 0)
                   3435:     {
1.1.1.2   misho    3436:     const pcre_uchar *save_end_subject = end_subject;
1.1       misho    3437: 
                   3438:     /* If firstline is TRUE, the start of the match is constrained to the first
                   3439:     line of a multiline string. Implement this by temporarily adjusting
                   3440:     end_subject so that we stop scanning at a newline. If the match fails at
                   3441:     the newline, later code breaks this loop. */
                   3442: 
                   3443:     if (firstline)
                   3444:       {
1.1.1.2   misho    3445:       PCRE_PUCHAR t = current_subject;
                   3446: #ifdef SUPPORT_UTF
                   3447:       if (utf)
1.1       misho    3448:         {
                   3449:         while (t < md->end_subject && !IS_NEWLINE(t))
                   3450:           {
                   3451:           t++;
1.1.1.2   misho    3452:           ACROSSCHAR(t < end_subject, *t, t++);
1.1       misho    3453:           }
                   3454:         }
                   3455:       else
                   3456: #endif
                   3457:       while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   3458:       end_subject = t;
                   3459:       }
                   3460: 
                   3461:     /* There are some optimizations that avoid running the match if a known
                   3462:     starting point is not found. However, there is an option that disables
                   3463:     these, for testing and for ensuring that all callouts do actually occur.
                   3464:     The option can be set in the regex by (*NO_START_OPT) or passed in
                   3465:     match-time options. */
                   3466: 
                   3467:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   3468:       {
1.1.1.2   misho    3469:       /* Advance to a known first char. */
1.1       misho    3470: 
1.1.1.2   misho    3471:       if (has_first_char)
1.1       misho    3472:         {
1.1.1.2   misho    3473:         if (first_char != first_char2)
1.1.1.4   misho    3474:           {
                   3475:           pcre_uchar csc;
1.1       misho    3476:           while (current_subject < end_subject &&
1.1.1.4   misho    3477:                  (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
1.1       misho    3478:             current_subject++;
1.1.1.4   misho    3479:           }
1.1       misho    3480:         else
                   3481:           while (current_subject < end_subject &&
1.1.1.4   misho    3482:                  RAWUCHARTEST(current_subject) != first_char)
1.1       misho    3483:             current_subject++;
                   3484:         }
                   3485: 
                   3486:       /* Or to just after a linebreak for a multiline match if possible */
                   3487: 
                   3488:       else if (startline)
                   3489:         {
                   3490:         if (current_subject > md->start_subject + start_offset)
                   3491:           {
1.1.1.2   misho    3492: #ifdef SUPPORT_UTF
                   3493:           if (utf)
1.1       misho    3494:             {
                   3495:             while (current_subject < end_subject &&
                   3496:                    !WAS_NEWLINE(current_subject))
                   3497:               {
                   3498:               current_subject++;
1.1.1.2   misho    3499:               ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3500:                 current_subject++);
1.1       misho    3501:               }
                   3502:             }
                   3503:           else
                   3504: #endif
                   3505:           while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
                   3506:             current_subject++;
                   3507: 
                   3508:           /* If we have just passed a CR and the newline option is ANY or
                   3509:           ANYCRLF, and we are now at a LF, advance the match position by one
                   3510:           more character. */
                   3511: 
1.1.1.4   misho    3512:           if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
1.1       misho    3513:                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   3514:                current_subject < end_subject &&
1.1.1.4   misho    3515:                RAWUCHARTEST(current_subject) == CHAR_NL)
1.1       misho    3516:             current_subject++;
                   3517:           }
                   3518:         }
                   3519: 
                   3520:       /* Or to a non-unique first char after study */
                   3521: 
                   3522:       else if (start_bits != NULL)
                   3523:         {
                   3524:         while (current_subject < end_subject)
                   3525:           {
1.1.1.4   misho    3526:           register pcre_uint32 c = RAWUCHARTEST(current_subject);
1.1.1.2   misho    3527: #ifndef COMPILE_PCRE8
                   3528:           if (c > 255) c = 255;
                   3529: #endif
1.1       misho    3530:           if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   3531:             {
                   3532:             current_subject++;
1.1.1.2   misho    3533: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
                   3534:             /* In non 8-bit mode, the iteration will stop for
                   3535:             characters > 255 at the beginning or not stop at all. */
                   3536:             if (utf)
                   3537:               ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3538:                 current_subject++);
1.1       misho    3539: #endif
                   3540:             }
                   3541:           else break;
                   3542:           }
                   3543:         }
                   3544:       }
                   3545: 
                   3546:     /* Restore fudged end_subject */
                   3547: 
                   3548:     end_subject = save_end_subject;
                   3549: 
                   3550:     /* The following two optimizations are disabled for partial matching or if
                   3551:     disabling is explicitly requested (and of course, by the test above, this
                   3552:     code is not obeyed when restarting after a partial match). */
                   3553: 
                   3554:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
                   3555:         (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
                   3556:       {
                   3557:       /* If the pattern was studied, a minimum subject length may be set. This
                   3558:       is a lower bound; no actual string of that length may actually match the
                   3559:       pattern. Although the value is, strictly, in characters, we treat it as
                   3560:       bytes to avoid spending too much time in this optimization. */
                   3561: 
                   3562:       if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   3563:           (pcre_uint32)(end_subject - current_subject) < study->minlength)
                   3564:         return PCRE_ERROR_NOMATCH;
                   3565: 
1.1.1.2   misho    3566:       /* If req_char is set, we know that that character must appear in the
                   3567:       subject for the match to succeed. If the first character is set, req_char
1.1       misho    3568:       must be later in the subject; otherwise the test starts at the match
                   3569:       point. This optimization can save a huge amount of work in patterns with
                   3570:       nested unlimited repeats that aren't going to match. Writing separate
                   3571:       code for cased/caseless versions makes it go faster, as does using an
                   3572:       autoincrement and backing off on a match.
                   3573: 
                   3574:       HOWEVER: when the subject string is very, very long, searching to its end
                   3575:       can take a long time, and give bad performance on quite ordinary
                   3576:       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
                   3577:       string... so we don't do this when the string is sufficiently long. */
                   3578: 
1.1.1.2   misho    3579:       if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
1.1       misho    3580:         {
1.1.1.2   misho    3581:         register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
1.1       misho    3582: 
                   3583:         /* We don't need to repeat the search if we haven't yet reached the
                   3584:         place we found it at last time. */
                   3585: 
1.1.1.2   misho    3586:         if (p > req_char_ptr)
1.1       misho    3587:           {
1.1.1.2   misho    3588:           if (req_char != req_char2)
1.1       misho    3589:             {
                   3590:             while (p < end_subject)
                   3591:               {
1.1.1.4   misho    3592:               register pcre_uint32 pp = RAWUCHARINCTEST(p);
1.1.1.2   misho    3593:               if (pp == req_char || pp == req_char2) { p--; break; }
1.1       misho    3594:               }
                   3595:             }
                   3596:           else
                   3597:             {
                   3598:             while (p < end_subject)
                   3599:               {
1.1.1.4   misho    3600:               if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
1.1       misho    3601:               }
                   3602:             }
                   3603: 
                   3604:           /* If we can't find the required character, break the matching loop,
                   3605:           which will cause a return or PCRE_ERROR_NOMATCH. */
                   3606: 
                   3607:           if (p >= end_subject) break;
                   3608: 
                   3609:           /* If we have found the required character, save the point where we
                   3610:           found it, so that we don't search again next time round the loop if
                   3611:           the start hasn't passed this character yet. */
                   3612: 
1.1.1.2   misho    3613:           req_char_ptr = p;
1.1       misho    3614:           }
                   3615:         }
                   3616:       }
                   3617:     }   /* End of optimizations that are done when not restarting */
                   3618: 
                   3619:   /* OK, now we can do the business */
                   3620: 
                   3621:   md->start_used_ptr = current_subject;
                   3622:   md->recursive = NULL;
                   3623: 
                   3624:   rc = internal_dfa_exec(
                   3625:     md,                                /* fixed match data */
                   3626:     md->start_code,                    /* this subexpression's code */
                   3627:     current_subject,                   /* where we currently are */
                   3628:     start_offset,                      /* start offset in subject */
                   3629:     offsets,                           /* offset vector */
                   3630:     offsetcount,                       /* size of same */
                   3631:     workspace,                         /* workspace vector */
                   3632:     wscount,                           /* size of same */
                   3633:     0);                                /* function recurse level */
                   3634: 
                   3635:   /* Anything other than "no match" means we are done, always; otherwise, carry
                   3636:   on only if not anchored. */
                   3637: 
1.1.1.4   misho    3638:   if (rc != PCRE_ERROR_NOMATCH || anchored)
                   3639:     {
                   3640:     if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
                   3641:       {
                   3642:       offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
                   3643:       offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
                   3644:       if (offsetcount > 2)
                   3645:         offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
                   3646:       }
                   3647:     return rc;
                   3648:     }
1.1       misho    3649: 
                   3650:   /* Advance to the next subject character unless we are at the end of a line
                   3651:   and firstline is set. */
                   3652: 
                   3653:   if (firstline && IS_NEWLINE(current_subject)) break;
                   3654:   current_subject++;
1.1.1.2   misho    3655: #ifdef SUPPORT_UTF
                   3656:   if (utf)
1.1       misho    3657:     {
1.1.1.2   misho    3658:     ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3659:       current_subject++);
1.1       misho    3660:     }
1.1.1.2   misho    3661: #endif
1.1       misho    3662:   if (current_subject > end_subject) break;
                   3663: 
                   3664:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   3665:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   3666:   or ANY or ANYCRLF, advance the match position by one more character. */
                   3667: 
1.1.1.4   misho    3668:   if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
1.1       misho    3669:       current_subject < end_subject &&
1.1.1.4   misho    3670:       RAWUCHARTEST(current_subject) == CHAR_NL &&
1.1       misho    3671:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   3672:         (md->nltype == NLTYPE_ANY ||
                   3673:          md->nltype == NLTYPE_ANYCRLF ||
                   3674:          md->nllen == 2))
                   3675:     current_subject++;
                   3676: 
                   3677:   }   /* "Bumpalong" loop */
                   3678: 
                   3679: return PCRE_ERROR_NOMATCH;
                   3680: }
                   3681: 
                   3682: /* End of pcre_dfa_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>