embedaddon/pcre/pcre_dfa_exec.c - annotate

Return to pcre_dfa_exec.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre
Annotation of embedaddon/pcre/pcre_dfa_exec.c, revision 1.1.1.4

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language (but see
                      7: below for why this module is different).
                      8: 
                      9:                        Written by Philip Hazel
1.1.1.4 ! misho      10:            Copyright (c) 1997-2013 University of Cambridge
1.1       misho      11: 
                     12: -----------------------------------------------------------------------------
                     13: Redistribution and use in source and binary forms, with or without
                     14: modification, are permitted provided that the following conditions are met:
                     15: 
                     16:     * Redistributions of source code must retain the above copyright notice,
                     17:       this list of conditions and the following disclaimer.
                     18: 
                     19:     * Redistributions in binary form must reproduce the above copyright
                     20:       notice, this list of conditions and the following disclaimer in the
                     21:       documentation and/or other materials provided with the distribution.
                     22: 
                     23:     * Neither the name of the University of Cambridge nor the names of its
                     24:       contributors may be used to endorse or promote products derived from
                     25:       this software without specific prior written permission.
                     26: 
                     27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37: POSSIBILITY OF SUCH DAMAGE.
                     38: -----------------------------------------------------------------------------
                     39: */
                     40: 
                     41: /* This module contains the external function pcre_dfa_exec(), which is an
                     42: alternative matching function that uses a sort of DFA algorithm (not a true
1.1.1.3   misho      43: FSM). This is NOT Perl-compatible, but it has advantages in certain
1.1       misho      44: applications. */
                     45: 
                     46: 
                     47: /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
                     48: the performance of his patterns greatly. I could not use it as it stood, as it
                     49: was not thread safe, and made assumptions about pattern sizes. Also, it caused
                     50: test 7 to loop, and test 9 to crash with a segfault.
                     51: 
                     52: The issue is the check for duplicate states, which is done by a simple linear
                     53: search up the state list. (Grep for "duplicate" below to find the code.) For
                     54: many patterns, there will never be many states active at one time, so a simple
                     55: linear search is fine. In patterns that have many active states, it might be a
                     56: bottleneck. The suggested code used an indexing scheme to remember which states
                     57: had previously been used for each character, and avoided the linear search when
                     58: it knew there was no chance of a duplicate. This was implemented when adding
                     59: states to the state lists.
                     60: 
                     61: I wrote some thread-safe, not-limited code to try something similar at the time
                     62: of checking for duplicates (instead of when adding states), using index vectors
                     63: on the stack. It did give a 13% improvement with one specially constructed
                     64: pattern for certain subject strings, but on other strings and on many of the
                     65: simpler patterns in the test suite it did worse. The major problem, I think,
                     66: was the extra time to initialize the index. This had to be done for each call
                     67: of internal_dfa_exec(). (The supplied patch used a static vector, initialized
                     68: only once - I suspect this was the cause of the problems with the tests.)
                     69: 
                     70: Overall, I concluded that the gains in some cases did not outweigh the losses
                     71: in others, so I abandoned this code. */
                     72: 
                     73: 
                     74: 
                     75: #ifdef HAVE_CONFIG_H
                     76: #include "config.h"
                     77: #endif
                     78: 
                     79: #define NLBLOCK md             /* Block containing newline information */
                     80: #define PSSTART start_subject  /* Field containing processed string start */
                     81: #define PSEND   end_subject    /* Field containing processed string end */
                     82: 
                     83: #include "pcre_internal.h"
                     84: 
                     85: 
                     86: /* For use to indent debugging output */
                     87: 
                     88: #define SP "                   "
                     89: 
                     90: 
                     91: /*************************************************
                     92: *      Code parameters and static tables         *
                     93: *************************************************/
                     94: 
                     95: /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
                     96: into others, under special conditions. A gap of 20 between the blocks should be
                     97: enough. The resulting opcodes don't have to be less than 256 because they are
                     98: never stored, so we push them well clear of the normal opcodes. */
                     99: 
                    100: #define OP_PROP_EXTRA       300
                    101: #define OP_EXTUNI_EXTRA     320
                    102: #define OP_ANYNL_EXTRA      340
                    103: #define OP_HSPACE_EXTRA     360
                    104: #define OP_VSPACE_EXTRA     380
                    105: 
                    106: 
                    107: /* This table identifies those opcodes that are followed immediately by a
                    108: character that is to be tested in some way. This makes it possible to
                    109: centralize the loading of these characters. In the case of Type * etc, the
                    110: "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
                    111: small value. Non-zero values in the table are the offsets from the opcode where
                    112: the character is to be found. ***NOTE*** If the start of this table is
                    113: modified, the three tables that follow must also be modified. */
                    114: 
1.1.1.2   misho     115: static const pcre_uint8 coptable[] = {
1.1       misho     116:   0,                             /* End                                    */
                    117:   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
                    118:   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
                    119:   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
                    120:   0, 0,                          /* \P, \p                                 */
                    121:   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
                    122:   0,                             /* \X                                     */
                    123:   0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
                    124:   1,                             /* Char                                   */
                    125:   1,                             /* Chari                                  */
                    126:   1,                             /* not                                    */
                    127:   1,                             /* noti                                   */
                    128:   /* Positive single-char repeats                                          */
                    129:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
1.1.1.2   misho     130:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
                    131:   1+IMM2_SIZE,                   /* exact                                  */
                    132:   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
1.1       misho     133:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
1.1.1.2   misho     134:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
                    135:   1+IMM2_SIZE,                   /* exact I                                */
                    136:   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
1.1       misho     137:   /* Negative single-char repeats - only for chars < 256                   */
                    138:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
1.1.1.2   misho     139:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
                    140:   1+IMM2_SIZE,                   /* NOT exact                              */
                    141:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
1.1       misho     142:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
1.1.1.2   misho     143:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
                    144:   1+IMM2_SIZE,                   /* NOT exact I                            */
                    145:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
1.1       misho     146:   /* Positive type repeats                                                 */
                    147:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
1.1.1.2   misho     148:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
                    149:   1+IMM2_SIZE,                   /* Type exact                             */
                    150:   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
1.1       misho     151:   /* Character class & ref repeats                                         */
                    152:   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
                    153:   0, 0,                          /* CRRANGE, CRMINRANGE                    */
                    154:   0,                             /* CLASS                                  */
                    155:   0,                             /* NCLASS                                 */
                    156:   0,                             /* XCLASS - variable length               */
                    157:   0,                             /* REF                                    */
                    158:   0,                             /* REFI                                   */
                    159:   0,                             /* RECURSE                                */
                    160:   0,                             /* CALLOUT                                */
                    161:   0,                             /* Alt                                    */
                    162:   0,                             /* Ket                                    */
                    163:   0,                             /* KetRmax                                */
                    164:   0,                             /* KetRmin                                */
                    165:   0,                             /* KetRpos                                */
                    166:   0,                             /* Reverse                                */
                    167:   0,                             /* Assert                                 */
                    168:   0,                             /* Assert not                             */
                    169:   0,                             /* Assert behind                          */
                    170:   0,                             /* Assert behind not                      */
                    171:   0, 0,                          /* ONCE, ONCE_NC                          */
                    172:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    173:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
                    174:   0, 0,                          /* CREF, NCREF                            */
                    175:   0, 0,                          /* RREF, NRREF                            */
                    176:   0,                             /* DEF                                    */
                    177:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    178:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    179:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    180:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    181:   0, 0                           /* CLOSE, SKIPZERO  */
                    182: };
                    183: 
                    184: /* This table identifies those opcodes that inspect a character. It is used to
                    185: remember the fact that a character could have been inspected when the end of
                    186: the subject is reached. ***NOTE*** If the start of this table is modified, the
                    187: two tables that follow must also be modified. */
                    188: 
1.1.1.2   misho     189: static const pcre_uint8 poptable[] = {
1.1       misho     190:   0,                             /* End                                    */
                    191:   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
                    192:   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
                    193:   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
                    194:   1, 1,                          /* \P, \p                                 */
                    195:   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
                    196:   1,                             /* \X                                     */
                    197:   0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
                    198:   1,                             /* Char                                   */
                    199:   1,                             /* Chari                                  */
                    200:   1,                             /* not                                    */
                    201:   1,                             /* noti                                   */
                    202:   /* Positive single-char repeats                                          */
                    203:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    204:   1, 1, 1,                       /* upto, minupto, exact                   */
                    205:   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
                    206:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
                    207:   1, 1, 1,                       /* upto I, minupto I, exact I             */
                    208:   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
                    209:   /* Negative single-char repeats - only for chars < 256                   */
                    210:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
                    211:   1, 1, 1,                       /* NOT upto, minupto, exact               */
                    212:   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
                    213:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
                    214:   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
                    215:   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
                    216:   /* Positive type repeats                                                 */
                    217:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
                    218:   1, 1, 1,                       /* Type upto, minupto, exact              */
                    219:   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
                    220:   /* Character class & ref repeats                                         */
                    221:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    222:   1, 1,                          /* CRRANGE, CRMINRANGE                    */
                    223:   1,                             /* CLASS                                  */
                    224:   1,                             /* NCLASS                                 */
                    225:   1,                             /* XCLASS - variable length               */
                    226:   0,                             /* REF                                    */
                    227:   0,                             /* REFI                                   */
                    228:   0,                             /* RECURSE                                */
                    229:   0,                             /* CALLOUT                                */
                    230:   0,                             /* Alt                                    */
                    231:   0,                             /* Ket                                    */
                    232:   0,                             /* KetRmax                                */
                    233:   0,                             /* KetRmin                                */
                    234:   0,                             /* KetRpos                                */
                    235:   0,                             /* Reverse                                */
                    236:   0,                             /* Assert                                 */
                    237:   0,                             /* Assert not                             */
                    238:   0,                             /* Assert behind                          */
                    239:   0,                             /* Assert behind not                      */
                    240:   0, 0,                          /* ONCE, ONCE_NC                          */
                    241:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    242:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
                    243:   0, 0,                          /* CREF, NCREF                            */
                    244:   0, 0,                          /* RREF, NRREF                            */
                    245:   0,                             /* DEF                                    */
                    246:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    247:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    248:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    249:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    250:   0, 0                           /* CLOSE, SKIPZERO                        */
                    251: };
                    252: 
                    253: /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
                    254: and \w */
                    255: 
1.1.1.2   misho     256: static const pcre_uint8 toptable1[] = {
1.1       misho     257:   0, 0, 0, 0, 0, 0,
                    258:   ctype_digit, ctype_digit,
                    259:   ctype_space, ctype_space,
                    260:   ctype_word,  ctype_word,
                    261:   0, 0                            /* OP_ANY, OP_ALLANY */
                    262: };
                    263: 
1.1.1.2   misho     264: static const pcre_uint8 toptable2[] = {
1.1       misho     265:   0, 0, 0, 0, 0, 0,
                    266:   ctype_digit, 0,
                    267:   ctype_space, 0,
                    268:   ctype_word,  0,
                    269:   1, 1                            /* OP_ANY, OP_ALLANY */
                    270: };
                    271: 
                    272: 
                    273: /* Structure for holding data about a particular state, which is in effect the
                    274: current data for an active path through the match tree. It must consist
                    275: entirely of ints because the working vector we are passed, and which we put
                    276: these structures in, is a vector of ints. */
                    277: 
                    278: typedef struct stateblock {
                    279:   int offset;                     /* Offset to opcode */
                    280:   int count;                      /* Count for repeats */
                    281:   int data;                       /* Some use extra data */
                    282: } stateblock;
                    283: 
1.1.1.3   misho     284: #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
1.1       misho     285: 
                    286: 
                    287: #ifdef PCRE_DEBUG
                    288: /*************************************************
                    289: *             Print character string             *
                    290: *************************************************/
                    291: 
                    292: /* Character string printing function for debugging.
                    293: 
                    294: Arguments:
                    295:   p            points to string
                    296:   length       number of bytes
                    297:   f            where to print
                    298: 
                    299: Returns:       nothing
                    300: */
                    301: 
                    302: static void
1.1.1.2   misho     303: pchars(const pcre_uchar *p, int length, FILE *f)
1.1       misho     304: {
1.1.1.4 ! misho     305: pcre_uint32 c;
1.1       misho     306: while (length-- > 0)
                    307:   {
                    308:   if (isprint(c = *(p++)))
                    309:     fprintf(f, "%c", c);
                    310:   else
1.1.1.4 ! misho     311:     fprintf(f, "\\x{%02x}", c);
1.1       misho     312:   }
                    313: }
                    314: #endif
                    315: 
                    316: 
                    317: 
                    318: /*************************************************
                    319: *    Execute a Regular Expression - DFA engine   *
                    320: *************************************************/
                    321: 
                    322: /* This internal function applies a compiled pattern to a subject string,
                    323: starting at a given point, using a DFA engine. This function is called from the
                    324: external one, possibly multiple times if the pattern is not anchored. The
                    325: function calls itself recursively for some kinds of subpattern.
                    326: 
                    327: Arguments:
                    328:   md                the match_data block with fixed information
                    329:   this_start_code   the opening bracket of this subexpression's code
                    330:   current_subject   where we currently are in the subject string
                    331:   start_offset      start offset in the subject string
                    332:   offsets           vector to contain the matching string offsets
                    333:   offsetcount       size of same
                    334:   workspace         vector of workspace
                    335:   wscount           size of same
                    336:   rlevel            function call recursion level
                    337: 
                    338: Returns:            > 0 => number of match offset pairs placed in offsets
                    339:                     = 0 => offsets overflowed; longest matches are present
                    340:                      -1 => failed to match
                    341:                    < -1 => some kind of unexpected problem
                    342: 
                    343: The following macros are used for adding states to the two state vectors (one
                    344: for the current character, one for the following character). */
                    345: 
                    346: #define ADD_ACTIVE(x,y) \
                    347:   if (active_count++ < wscount) \
                    348:     { \
                    349:     next_active_state->offset = (x); \
                    350:     next_active_state->count  = (y); \
                    351:     next_active_state++; \
                    352:     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    353:     } \
                    354:   else return PCRE_ERROR_DFA_WSSIZE
                    355: 
                    356: #define ADD_ACTIVE_DATA(x,y,z) \
                    357:   if (active_count++ < wscount) \
                    358:     { \
                    359:     next_active_state->offset = (x); \
                    360:     next_active_state->count  = (y); \
                    361:     next_active_state->data   = (z); \
                    362:     next_active_state++; \
                    363:     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
                    364:     } \
                    365:   else return PCRE_ERROR_DFA_WSSIZE
                    366: 
                    367: #define ADD_NEW(x,y) \
                    368:   if (new_count++ < wscount) \
                    369:     { \
                    370:     next_new_state->offset = (x); \
                    371:     next_new_state->count  = (y); \
                    372:     next_new_state++; \
                    373:     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    374:     } \
                    375:   else return PCRE_ERROR_DFA_WSSIZE
                    376: 
                    377: #define ADD_NEW_DATA(x,y,z) \
                    378:   if (new_count++ < wscount) \
                    379:     { \
                    380:     next_new_state->offset = (x); \
                    381:     next_new_state->count  = (y); \
                    382:     next_new_state->data   = (z); \
                    383:     next_new_state++; \
1.1.1.3   misho     384:     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
                    385:       (x), (y), (z), __LINE__)); \
1.1       misho     386:     } \
                    387:   else return PCRE_ERROR_DFA_WSSIZE
                    388: 
                    389: /* And now, here is the code */
                    390: 
                    391: static int
                    392: internal_dfa_exec(
                    393:   dfa_match_data *md,
1.1.1.2   misho     394:   const pcre_uchar *this_start_code,
                    395:   const pcre_uchar *current_subject,
1.1       misho     396:   int start_offset,
                    397:   int *offsets,
                    398:   int offsetcount,
                    399:   int *workspace,
                    400:   int wscount,
                    401:   int  rlevel)
                    402: {
                    403: stateblock *active_states, *new_states, *temp_states;
                    404: stateblock *next_active_state, *next_new_state;
                    405: 
1.1.1.2   misho     406: const pcre_uint8 *ctypes, *lcc, *fcc;
                    407: const pcre_uchar *ptr;
                    408: const pcre_uchar *end_code, *first_op;
1.1       misho     409: 
                    410: dfa_recursion_info new_recursive;
                    411: 
                    412: int active_count, new_count, match_count;
                    413: 
                    414: /* Some fields in the md block are frequently referenced, so we load them into
                    415: independent variables in the hope that this will perform better. */
                    416: 
1.1.1.2   misho     417: const pcre_uchar *start_subject = md->start_subject;
                    418: const pcre_uchar *end_subject = md->end_subject;
                    419: const pcre_uchar *start_code = md->start_code;
1.1       misho     420: 
1.1.1.2   misho     421: #ifdef SUPPORT_UTF
                    422: BOOL utf = (md->poptions & PCRE_UTF8) != 0;
1.1       misho     423: #else
1.1.1.2   misho     424: BOOL utf = FALSE;
1.1       misho     425: #endif
                    426: 
1.1.1.3   misho     427: BOOL reset_could_continue = FALSE;
                    428: 
1.1       misho     429: rlevel++;
                    430: offsetcount &= (-2);
                    431: 
                    432: wscount -= 2;
                    433: wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
                    434:           (2 * INTS_PER_STATEBLOCK);
                    435: 
                    436: DPRINTF(("\n%.*s---------------------\n"
                    437:   "%.*sCall to internal_dfa_exec f=%d\n",
                    438:   rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
                    439: 
                    440: ctypes = md->tables + ctypes_offset;
                    441: lcc = md->tables + lcc_offset;
                    442: fcc = md->tables + fcc_offset;
                    443: 
                    444: match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
                    445: 
                    446: active_states = (stateblock *)(workspace + 2);
                    447: next_new_state = new_states = active_states + wscount;
                    448: new_count = 0;
                    449: 
                    450: first_op = this_start_code + 1 + LINK_SIZE +
                    451:   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
1.1.1.2   misho     452:     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
                    453:     ? IMM2_SIZE:0);
1.1       misho     454: 
                    455: /* The first thing in any (sub) pattern is a bracket of some sort. Push all
                    456: the alternative states onto the list, and find out where the end is. This
                    457: makes is possible to use this function recursively, when we want to stop at a
                    458: matching internal ket rather than at the end.
                    459: 
                    460: If the first opcode in the first alternative is OP_REVERSE, we are dealing with
                    461: a backward assertion. In that case, we have to find out the maximum amount to
                    462: move back, and set up each alternative appropriately. */
                    463: 
                    464: if (*first_op == OP_REVERSE)
                    465:   {
                    466:   int max_back = 0;
                    467:   int gone_back;
                    468: 
                    469:   end_code = this_start_code;
                    470:   do
                    471:     {
                    472:     int back = GET(end_code, 2+LINK_SIZE);
                    473:     if (back > max_back) max_back = back;
                    474:     end_code += GET(end_code, 1);
                    475:     }
                    476:   while (*end_code == OP_ALT);
                    477: 
                    478:   /* If we can't go back the amount required for the longest lookbehind
                    479:   pattern, go back as far as we can; some alternatives may still be viable. */
                    480: 
1.1.1.2   misho     481: #ifdef SUPPORT_UTF
1.1       misho     482:   /* In character mode we have to step back character by character */
                    483: 
1.1.1.2   misho     484:   if (utf)
1.1       misho     485:     {
                    486:     for (gone_back = 0; gone_back < max_back; gone_back++)
                    487:       {
                    488:       if (current_subject <= start_subject) break;
                    489:       current_subject--;
1.1.1.2   misho     490:       ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
1.1       misho     491:       }
                    492:     }
                    493:   else
                    494: #endif
                    495: 
                    496:   /* In byte-mode we can do this quickly. */
                    497: 
                    498:     {
                    499:     gone_back = (current_subject - max_back < start_subject)?
                    500:       (int)(current_subject - start_subject) : max_back;
                    501:     current_subject -= gone_back;
                    502:     }
                    503: 
                    504:   /* Save the earliest consulted character */
                    505: 
                    506:   if (current_subject < md->start_used_ptr)
                    507:     md->start_used_ptr = current_subject;
                    508: 
                    509:   /* Now we can process the individual branches. */
                    510: 
                    511:   end_code = this_start_code;
                    512:   do
                    513:     {
                    514:     int back = GET(end_code, 2+LINK_SIZE);
                    515:     if (back <= gone_back)
                    516:       {
                    517:       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
                    518:       ADD_NEW_DATA(-bstate, 0, gone_back - back);
                    519:       }
                    520:     end_code += GET(end_code, 1);
                    521:     }
                    522:   while (*end_code == OP_ALT);
                    523:  }
                    524: 
                    525: /* This is the code for a "normal" subpattern (not a backward assertion). The
                    526: start of a whole pattern is always one of these. If we are at the top level,
                    527: we may be asked to restart matching from the same point that we reached for a
                    528: previous partial match. We still have to scan through the top-level branches to
                    529: find the end state. */
                    530: 
                    531: else
                    532:   {
                    533:   end_code = this_start_code;
                    534: 
                    535:   /* Restarting */
                    536: 
                    537:   if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
                    538:     {
                    539:     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
                    540:     new_count = workspace[1];
                    541:     if (!workspace[0])
                    542:       memcpy(new_states, active_states, new_count * sizeof(stateblock));
                    543:     }
                    544: 
                    545:   /* Not restarting */
                    546: 
                    547:   else
                    548:     {
                    549:     int length = 1 + LINK_SIZE +
                    550:       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
1.1.1.2   misho     551:         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
                    552:         ? IMM2_SIZE:0);
1.1       misho     553:     do
                    554:       {
                    555:       ADD_NEW((int)(end_code - start_code + length), 0);
                    556:       end_code += GET(end_code, 1);
                    557:       length = 1 + LINK_SIZE;
                    558:       }
                    559:     while (*end_code == OP_ALT);
                    560:     }
                    561:   }
                    562: 
                    563: workspace[0] = 0;    /* Bit indicating which vector is current */
                    564: 
1.1.1.2   misho     565: DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
1.1       misho     566: 
                    567: /* Loop for scanning the subject */
                    568: 
                    569: ptr = current_subject;
                    570: for (;;)
                    571:   {
                    572:   int i, j;
                    573:   int clen, dlen;
1.1.1.4 ! misho     574:   pcre_uint32 c, d;
1.1       misho     575:   int forced_fail = 0;
1.1.1.3   misho     576:   BOOL partial_newline = FALSE;
                    577:   BOOL could_continue = reset_could_continue;
                    578:   reset_could_continue = FALSE;
1.1       misho     579: 
                    580:   /* Make the new state list into the active state list and empty the
                    581:   new state list. */
                    582: 
                    583:   temp_states = active_states;
                    584:   active_states = new_states;
                    585:   new_states = temp_states;
                    586:   active_count = new_count;
                    587:   new_count = 0;
                    588: 
                    589:   workspace[0] ^= 1;              /* Remember for the restarting feature */
                    590:   workspace[1] = active_count;
                    591: 
                    592: #ifdef PCRE_DEBUG
                    593:   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
1.1.1.2   misho     594:   pchars(ptr, STRLEN_UC(ptr), stdout);
1.1       misho     595:   printf("\"\n");
                    596: 
                    597:   printf("%.*sActive states: ", rlevel*2-2, SP);
                    598:   for (i = 0; i < active_count; i++)
                    599:     printf("%d/%d ", active_states[i].offset, active_states[i].count);
                    600:   printf("\n");
                    601: #endif
                    602: 
                    603:   /* Set the pointers for adding new states */
                    604: 
                    605:   next_active_state = active_states + active_count;
                    606:   next_new_state = new_states;
                    607: 
                    608:   /* Load the current character from the subject outside the loop, as many
                    609:   different states may want to look at it, and we assume that at least one
                    610:   will. */
                    611: 
                    612:   if (ptr < end_subject)
                    613:     {
1.1.1.3   misho     614:     clen = 1;        /* Number of data items in the character */
1.1.1.2   misho     615: #ifdef SUPPORT_UTF
1.1.1.4 ! misho     616:     GETCHARLENTEST(c, ptr, clen);
        !           617: #else
1.1       misho     618:     c = *ptr;
1.1.1.4 ! misho     619: #endif  /* SUPPORT_UTF */
1.1       misho     620:     }
                    621:   else
                    622:     {
                    623:     clen = 0;        /* This indicates the end of the subject */
                    624:     c = NOTACHAR;    /* This value should never actually be used */
                    625:     }
                    626: 
                    627:   /* Scan up the active states and act on each one. The result of an action
                    628:   may be to add more states to the currently active list (e.g. on hitting a
                    629:   parenthesis) or it may be to put states on the new list, for considering
                    630:   when we move the character pointer on. */
                    631: 
                    632:   for (i = 0; i < active_count; i++)
                    633:     {
                    634:     stateblock *current_state = active_states + i;
                    635:     BOOL caseless = FALSE;
1.1.1.2   misho     636:     const pcre_uchar *code;
1.1       misho     637:     int state_offset = current_state->offset;
1.1.1.4 ! misho     638:     int codevalue, rrc;
        !           639:     int count;
1.1       misho     640: 
                    641: #ifdef PCRE_DEBUG
                    642:     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
                    643:     if (clen == 0) printf("EOL\n");
                    644:       else if (c > 32 && c < 127) printf("'%c'\n", c);
                    645:         else printf("0x%02x\n", c);
                    646: #endif
                    647: 
                    648:     /* A negative offset is a special case meaning "hold off going to this
                    649:     (negated) state until the number of characters in the data field have
1.1.1.3   misho     650:     been skipped". If the could_continue flag was passed over from a previous
                    651:     state, arrange for it to passed on. */
1.1       misho     652: 
                    653:     if (state_offset < 0)
                    654:       {
                    655:       if (current_state->data > 0)
                    656:         {
                    657:         DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
                    658:         ADD_NEW_DATA(state_offset, current_state->count,
                    659:           current_state->data - 1);
1.1.1.3   misho     660:         if (could_continue) reset_could_continue = TRUE;
1.1       misho     661:         continue;
                    662:         }
                    663:       else
                    664:         {
                    665:         current_state->offset = state_offset = -state_offset;
                    666:         }
                    667:       }
                    668: 
                    669:     /* Check for a duplicate state with the same count, and skip if found.
                    670:     See the note at the head of this module about the possibility of improving
                    671:     performance here. */
                    672: 
                    673:     for (j = 0; j < i; j++)
                    674:       {
                    675:       if (active_states[j].offset == state_offset &&
                    676:           active_states[j].count == current_state->count)
                    677:         {
                    678:         DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
                    679:         goto NEXT_ACTIVE_STATE;
                    680:         }
                    681:       }
                    682: 
                    683:     /* The state offset is the offset to the opcode */
                    684: 
                    685:     code = start_code + state_offset;
                    686:     codevalue = *code;
                    687: 
                    688:     /* If this opcode inspects a character, but we are at the end of the
                    689:     subject, remember the fact for use when testing for a partial match. */
                    690: 
                    691:     if (clen == 0 && poptable[codevalue] != 0)
                    692:       could_continue = TRUE;
                    693: 
                    694:     /* If this opcode is followed by an inline character, load it. It is
                    695:     tempting to test for the presence of a subject character here, but that
                    696:     is wrong, because sometimes zero repetitions of the subject are
                    697:     permitted.
                    698: 
                    699:     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
1.1.1.3   misho     700:     argument that is not a data character - but is always one byte long because
                    701:     the values are small. We have to take special action to deal with  \P, \p,
                    702:     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
                    703:     these ones to new opcodes. */
1.1       misho     704: 
                    705:     if (coptable[codevalue] > 0)
                    706:       {
                    707:       dlen = 1;
1.1.1.2   misho     708: #ifdef SUPPORT_UTF
                    709:       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
                    710: #endif  /* SUPPORT_UTF */
1.1       misho     711:       d = code[coptable[codevalue]];
                    712:       if (codevalue >= OP_TYPESTAR)
                    713:         {
                    714:         switch(d)
                    715:           {
                    716:           case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
                    717:           case OP_NOTPROP:
                    718:           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
                    719:           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
                    720:           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
                    721:           case OP_NOT_HSPACE:
                    722:           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
                    723:           case OP_NOT_VSPACE:
                    724:           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
                    725:           default: break;
                    726:           }
                    727:         }
                    728:       }
                    729:     else
                    730:       {
                    731:       dlen = 0;         /* Not strictly necessary, but compilers moan */
                    732:       d = NOTACHAR;     /* if these variables are not set. */
                    733:       }
                    734: 
                    735: 
                    736:     /* Now process the individual opcodes */
                    737: 
                    738:     switch (codevalue)
                    739:       {
                    740: /* ========================================================================== */
                    741:       /* These cases are never obeyed. This is a fudge that causes a compile-
                    742:       time error if the vectors coptable or poptable, which are indexed by
                    743:       opcode, are not the correct length. It seems to be the only way to do
                    744:       such a check at compile time, as the sizeof() operator does not work
                    745:       in the C preprocessor. */
                    746: 
                    747:       case OP_TABLE_LENGTH:
                    748:       case OP_TABLE_LENGTH +
                    749:         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
                    750:          (sizeof(poptable) == OP_TABLE_LENGTH)):
                    751:       break;
                    752: 
                    753: /* ========================================================================== */
                    754:       /* Reached a closing bracket. If not at the end of the pattern, carry
                    755:       on with the next opcode. For repeating opcodes, also add the repeat
                    756:       state. Note that KETRPOS will always be encountered at the end of the
                    757:       subpattern, because the possessive subpattern repeats are always handled
                    758:       using recursive calls. Thus, it never adds any new states.
                    759: 
                    760:       At the end of the (sub)pattern, unless we have an empty string and
                    761:       PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
                    762:       start of the subject, save the match data, shifting up all previous
                    763:       matches so we always have the longest first. */
                    764: 
                    765:       case OP_KET:
                    766:       case OP_KETRMIN:
                    767:       case OP_KETRMAX:
                    768:       case OP_KETRPOS:
                    769:       if (code != end_code)
                    770:         {
                    771:         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
                    772:         if (codevalue != OP_KET)
                    773:           {
                    774:           ADD_ACTIVE(state_offset - GET(code, 1), 0);
                    775:           }
                    776:         }
                    777:       else
                    778:         {
                    779:         if (ptr > current_subject ||
                    780:             ((md->moptions & PCRE_NOTEMPTY) == 0 &&
                    781:               ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
                    782:                 current_subject > start_subject + md->start_offset)))
                    783:           {
                    784:           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
                    785:             else if (match_count > 0 && ++match_count * 2 > offsetcount)
                    786:               match_count = 0;
                    787:           count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
                    788:           if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
                    789:           if (offsetcount >= 2)
                    790:             {
                    791:             offsets[0] = (int)(current_subject - start_subject);
                    792:             offsets[1] = (int)(ptr - start_subject);
                    793:             DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
1.1.1.3   misho     794:               offsets[1] - offsets[0], (char *)current_subject));
1.1       misho     795:             }
                    796:           if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
                    797:             {
                    798:             DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                    799:               "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
                    800:               match_count, rlevel*2-2, SP));
                    801:             return match_count;
                    802:             }
                    803:           }
                    804:         }
                    805:       break;
                    806: 
                    807: /* ========================================================================== */
                    808:       /* These opcodes add to the current list of states without looking
                    809:       at the current character. */
                    810: 
                    811:       /*-----------------------------------------------------------------*/
                    812:       case OP_ALT:
                    813:       do { code += GET(code, 1); } while (*code == OP_ALT);
                    814:       ADD_ACTIVE((int)(code - start_code), 0);
                    815:       break;
                    816: 
                    817:       /*-----------------------------------------------------------------*/
                    818:       case OP_BRA:
                    819:       case OP_SBRA:
                    820:       do
                    821:         {
                    822:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    823:         code += GET(code, 1);
                    824:         }
                    825:       while (*code == OP_ALT);
                    826:       break;
                    827: 
                    828:       /*-----------------------------------------------------------------*/
                    829:       case OP_CBRA:
                    830:       case OP_SCBRA:
1.1.1.2   misho     831:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
1.1       misho     832:       code += GET(code, 1);
                    833:       while (*code == OP_ALT)
                    834:         {
                    835:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
                    836:         code += GET(code, 1);
                    837:         }
                    838:       break;
                    839: 
                    840:       /*-----------------------------------------------------------------*/
                    841:       case OP_BRAZERO:
                    842:       case OP_BRAMINZERO:
                    843:       ADD_ACTIVE(state_offset + 1, 0);
                    844:       code += 1 + GET(code, 2);
                    845:       while (*code == OP_ALT) code += GET(code, 1);
                    846:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    847:       break;
                    848: 
                    849:       /*-----------------------------------------------------------------*/
                    850:       case OP_SKIPZERO:
                    851:       code += 1 + GET(code, 2);
                    852:       while (*code == OP_ALT) code += GET(code, 1);
                    853:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    854:       break;
                    855: 
                    856:       /*-----------------------------------------------------------------*/
                    857:       case OP_CIRC:
                    858:       if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
                    859:         { ADD_ACTIVE(state_offset + 1, 0); }
                    860:       break;
                    861: 
                    862:       /*-----------------------------------------------------------------*/
                    863:       case OP_CIRCM:
                    864:       if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
                    865:           (ptr != end_subject && WAS_NEWLINE(ptr)))
                    866:         { ADD_ACTIVE(state_offset + 1, 0); }
                    867:       break;
                    868: 
                    869:       /*-----------------------------------------------------------------*/
                    870:       case OP_EOD:
                    871:       if (ptr >= end_subject)
                    872:         {
                    873:         if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    874:           could_continue = TRUE;
                    875:         else { ADD_ACTIVE(state_offset + 1, 0); }
                    876:         }
                    877:       break;
                    878: 
                    879:       /*-----------------------------------------------------------------*/
                    880:       case OP_SOD:
                    881:       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
                    882:       break;
                    883: 
                    884:       /*-----------------------------------------------------------------*/
                    885:       case OP_SOM:
                    886:       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
                    887:       break;
                    888: 
                    889: 
                    890: /* ========================================================================== */
                    891:       /* These opcodes inspect the next subject character, and sometimes
                    892:       the previous one as well, but do not have an argument. The variable
                    893:       clen contains the length of the current character and is zero if we are
                    894:       at the end of the subject. */
                    895: 
                    896:       /*-----------------------------------------------------------------*/
                    897:       case OP_ANY:
                    898:       if (clen > 0 && !IS_NEWLINE(ptr))
1.1.1.3   misho     899:         {
                    900:         if (ptr + 1 >= md->end_subject &&
                    901:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                    902:             NLBLOCK->nltype == NLTYPE_FIXED &&
                    903:             NLBLOCK->nllen == 2 &&
                    904:             c == NLBLOCK->nl[0])
                    905:           {
                    906:           could_continue = partial_newline = TRUE;
                    907:           }
                    908:         else
                    909:           {
                    910:           ADD_NEW(state_offset + 1, 0);
                    911:           }
                    912:         }
1.1       misho     913:       break;
                    914: 
                    915:       /*-----------------------------------------------------------------*/
                    916:       case OP_ALLANY:
                    917:       if (clen > 0)
                    918:         { ADD_NEW(state_offset + 1, 0); }
                    919:       break;
                    920: 
                    921:       /*-----------------------------------------------------------------*/
                    922:       case OP_EODN:
                    923:       if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    924:         could_continue = TRUE;
                    925:       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
                    926:         { ADD_ACTIVE(state_offset + 1, 0); }
                    927:       break;
                    928: 
                    929:       /*-----------------------------------------------------------------*/
                    930:       case OP_DOLL:
                    931:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    932:         {
                    933:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    934:           could_continue = TRUE;
                    935:         else if (clen == 0 ||
                    936:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
                    937:                (ptr == end_subject - md->nllen)
                    938:             ))
                    939:           { ADD_ACTIVE(state_offset + 1, 0); }
1.1.1.3   misho     940:         else if (ptr + 1 >= md->end_subject &&
                    941:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
                    942:                  NLBLOCK->nltype == NLTYPE_FIXED &&
                    943:                  NLBLOCK->nllen == 2 &&
                    944:                  c == NLBLOCK->nl[0])
                    945:           {
                    946:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    947:             {
                    948:             reset_could_continue = TRUE;
                    949:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                    950:             }
                    951:           else could_continue = partial_newline = TRUE;
                    952:           }
1.1       misho     953:         }
                    954:       break;
                    955: 
                    956:       /*-----------------------------------------------------------------*/
                    957:       case OP_DOLLM:
                    958:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    959:         {
                    960:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    961:           could_continue = TRUE;
                    962:         else if (clen == 0 ||
                    963:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
                    964:           { ADD_ACTIVE(state_offset + 1, 0); }
1.1.1.3   misho     965:         else if (ptr + 1 >= md->end_subject &&
                    966:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
                    967:                  NLBLOCK->nltype == NLTYPE_FIXED &&
                    968:                  NLBLOCK->nllen == 2 &&
                    969:                  c == NLBLOCK->nl[0])
                    970:           {
                    971:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    972:             {
                    973:             reset_could_continue = TRUE;
                    974:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                    975:             }
                    976:           else could_continue = partial_newline = TRUE;
                    977:           }
1.1       misho     978:         }
                    979:       else if (IS_NEWLINE(ptr))
                    980:         { ADD_ACTIVE(state_offset + 1, 0); }
                    981:       break;
                    982: 
                    983:       /*-----------------------------------------------------------------*/
                    984: 
                    985:       case OP_DIGIT:
                    986:       case OP_WHITESPACE:
                    987:       case OP_WORDCHAR:
                    988:       if (clen > 0 && c < 256 &&
                    989:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
                    990:         { ADD_NEW(state_offset + 1, 0); }
                    991:       break;
                    992: 
                    993:       /*-----------------------------------------------------------------*/
                    994:       case OP_NOT_DIGIT:
                    995:       case OP_NOT_WHITESPACE:
                    996:       case OP_NOT_WORDCHAR:
                    997:       if (clen > 0 && (c >= 256 ||
                    998:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
                    999:         { ADD_NEW(state_offset + 1, 0); }
                   1000:       break;
                   1001: 
                   1002:       /*-----------------------------------------------------------------*/
                   1003:       case OP_WORD_BOUNDARY:
                   1004:       case OP_NOT_WORD_BOUNDARY:
                   1005:         {
                   1006:         int left_word, right_word;
                   1007: 
                   1008:         if (ptr > start_subject)
                   1009:           {
1.1.1.2   misho    1010:           const pcre_uchar *temp = ptr - 1;
1.1       misho    1011:           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1.1.1.4 ! misho    1012: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.2   misho    1013:           if (utf) { BACKCHAR(temp); }
1.1       misho    1014: #endif
                   1015:           GETCHARTEST(d, temp);
                   1016: #ifdef SUPPORT_UCP
                   1017:           if ((md->poptions & PCRE_UCP) != 0)
                   1018:             {
                   1019:             if (d == '_') left_word = TRUE; else
                   1020:               {
                   1021:               int cat = UCD_CATEGORY(d);
                   1022:               left_word = (cat == ucp_L || cat == ucp_N);
                   1023:               }
                   1024:             }
                   1025:           else
                   1026: #endif
                   1027:           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
                   1028:           }
                   1029:         else left_word = FALSE;
                   1030: 
                   1031:         if (clen > 0)
                   1032:           {
                   1033: #ifdef SUPPORT_UCP
                   1034:           if ((md->poptions & PCRE_UCP) != 0)
                   1035:             {
                   1036:             if (c == '_') right_word = TRUE; else
                   1037:               {
                   1038:               int cat = UCD_CATEGORY(c);
                   1039:               right_word = (cat == ucp_L || cat == ucp_N);
                   1040:               }
                   1041:             }
                   1042:           else
                   1043: #endif
                   1044:           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
                   1045:           }
                   1046:         else right_word = FALSE;
                   1047: 
                   1048:         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
                   1049:           { ADD_ACTIVE(state_offset + 1, 0); }
                   1050:         }
                   1051:       break;
                   1052: 
                   1053: 
                   1054:       /*-----------------------------------------------------------------*/
                   1055:       /* Check the next character by Unicode property. We will get here only
                   1056:       if the support is in the binary; otherwise a compile-time error occurs.
                   1057:       */
                   1058: 
                   1059: #ifdef SUPPORT_UCP
                   1060:       case OP_PROP:
                   1061:       case OP_NOTPROP:
                   1062:       if (clen > 0)
                   1063:         {
                   1064:         BOOL OK;
1.1.1.4 ! misho    1065:         const pcre_uint32 *cp;
1.1       misho    1066:         const ucd_record * prop = GET_UCD(c);
                   1067:         switch(code[1])
                   1068:           {
                   1069:           case PT_ANY:
                   1070:           OK = TRUE;
                   1071:           break;
                   1072: 
                   1073:           case PT_LAMP:
                   1074:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1075:                prop->chartype == ucp_Lt;
                   1076:           break;
                   1077: 
                   1078:           case PT_GC:
1.1.1.2   misho    1079:           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1.1       misho    1080:           break;
                   1081: 
                   1082:           case PT_PC:
                   1083:           OK = prop->chartype == code[2];
                   1084:           break;
                   1085: 
                   1086:           case PT_SC:
                   1087:           OK = prop->script == code[2];
                   1088:           break;
                   1089: 
                   1090:           /* These are specials for combination cases. */
                   1091: 
                   1092:           case PT_ALNUM:
1.1.1.2   misho    1093:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1094:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1095:           break;
                   1096: 
                   1097:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1098:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1099:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1100:           break;
                   1101: 
                   1102:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1103:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1104:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1105:                c == CHAR_FF || c == CHAR_CR;
                   1106:           break;
                   1107: 
                   1108:           case PT_WORD:
1.1.1.2   misho    1109:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1110:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1111:                c == CHAR_UNDERSCORE;
                   1112:           break;
                   1113: 
1.1.1.4 ! misho    1114:           case PT_CLIST:
        !          1115:           cp = PRIV(ucd_caseless_sets) + code[2];
        !          1116:           for (;;)
        !          1117:             {
        !          1118:             if (c < *cp) { OK = FALSE; break; }
        !          1119:             if (c == *cp++) { OK = TRUE; break; }
        !          1120:             }
        !          1121:           break;
        !          1122: 
        !          1123:           case PT_UCNC:
        !          1124:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          1125:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          1126:                c >= 0xe000;
        !          1127:           break;
        !          1128: 
1.1       misho    1129:           /* Should never occur, but keep compilers from grumbling. */
                   1130: 
                   1131:           default:
                   1132:           OK = codevalue != OP_PROP;
                   1133:           break;
                   1134:           }
                   1135: 
                   1136:         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
                   1137:         }
                   1138:       break;
                   1139: #endif
                   1140: 
                   1141: 
                   1142: 
                   1143: /* ========================================================================== */
                   1144:       /* These opcodes likewise inspect the subject character, but have an
                   1145:       argument that is not a data character. It is one of these opcodes:
                   1146:       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
                   1147:       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
                   1148: 
                   1149:       case OP_TYPEPLUS:
                   1150:       case OP_TYPEMINPLUS:
                   1151:       case OP_TYPEPOSPLUS:
                   1152:       count = current_state->count;  /* Already matched */
                   1153:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1154:       if (clen > 0)
                   1155:         {
1.1.1.3   misho    1156:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1157:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1158:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1159:             NLBLOCK->nllen == 2 &&
                   1160:             c == NLBLOCK->nl[0])
                   1161:           {
                   1162:           could_continue = partial_newline = TRUE;
                   1163:           }
                   1164:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1165:             (c < 256 &&
                   1166:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1167:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1168:           {
                   1169:           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
                   1170:             {
                   1171:             active_count--;            /* Remove non-match possibility */
                   1172:             next_active_state--;
                   1173:             }
                   1174:           count++;
                   1175:           ADD_NEW(state_offset, count);
                   1176:           }
                   1177:         }
                   1178:       break;
                   1179: 
                   1180:       /*-----------------------------------------------------------------*/
                   1181:       case OP_TYPEQUERY:
                   1182:       case OP_TYPEMINQUERY:
                   1183:       case OP_TYPEPOSQUERY:
                   1184:       ADD_ACTIVE(state_offset + 2, 0);
                   1185:       if (clen > 0)
                   1186:         {
1.1.1.3   misho    1187:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1188:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1189:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1190:             NLBLOCK->nllen == 2 &&
                   1191:             c == NLBLOCK->nl[0])
                   1192:           {
                   1193:           could_continue = partial_newline = TRUE;
                   1194:           }
                   1195:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1196:             (c < 256 &&
                   1197:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1198:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1199:           {
                   1200:           if (codevalue == OP_TYPEPOSQUERY)
                   1201:             {
                   1202:             active_count--;            /* Remove non-match possibility */
                   1203:             next_active_state--;
                   1204:             }
                   1205:           ADD_NEW(state_offset + 2, 0);
                   1206:           }
                   1207:         }
                   1208:       break;
                   1209: 
                   1210:       /*-----------------------------------------------------------------*/
                   1211:       case OP_TYPESTAR:
                   1212:       case OP_TYPEMINSTAR:
                   1213:       case OP_TYPEPOSSTAR:
                   1214:       ADD_ACTIVE(state_offset + 2, 0);
                   1215:       if (clen > 0)
                   1216:         {
1.1.1.3   misho    1217:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1218:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1219:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1220:             NLBLOCK->nllen == 2 &&
                   1221:             c == NLBLOCK->nl[0])
                   1222:           {
                   1223:           could_continue = partial_newline = TRUE;
                   1224:           }
                   1225:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1226:             (c < 256 &&
                   1227:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1228:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1229:           {
                   1230:           if (codevalue == OP_TYPEPOSSTAR)
                   1231:             {
                   1232:             active_count--;            /* Remove non-match possibility */
                   1233:             next_active_state--;
                   1234:             }
                   1235:           ADD_NEW(state_offset, 0);
                   1236:           }
                   1237:         }
                   1238:       break;
                   1239: 
                   1240:       /*-----------------------------------------------------------------*/
                   1241:       case OP_TYPEEXACT:
                   1242:       count = current_state->count;  /* Number already matched */
                   1243:       if (clen > 0)
                   1244:         {
1.1.1.3   misho    1245:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1246:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1247:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1248:             NLBLOCK->nllen == 2 &&
                   1249:             c == NLBLOCK->nl[0])
                   1250:           {
                   1251:           could_continue = partial_newline = TRUE;
                   1252:           }
                   1253:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1254:             (c < 256 &&
                   1255:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1256:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1257:           {
1.1.1.4 ! misho    1258:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1259:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1.1       misho    1260:           else
                   1261:             { ADD_NEW(state_offset, count); }
                   1262:           }
                   1263:         }
                   1264:       break;
                   1265: 
                   1266:       /*-----------------------------------------------------------------*/
                   1267:       case OP_TYPEUPTO:
                   1268:       case OP_TYPEMINUPTO:
                   1269:       case OP_TYPEPOSUPTO:
1.1.1.2   misho    1270:       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1.1       misho    1271:       count = current_state->count;  /* Number already matched */
                   1272:       if (clen > 0)
                   1273:         {
1.1.1.3   misho    1274:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
                   1275:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
                   1276:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   1277:             NLBLOCK->nllen == 2 &&
                   1278:             c == NLBLOCK->nl[0])
                   1279:           {
                   1280:           could_continue = partial_newline = TRUE;
                   1281:           }
                   1282:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1283:             (c < 256 &&
                   1284:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1285:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1286:           {
                   1287:           if (codevalue == OP_TYPEPOSUPTO)
                   1288:             {
                   1289:             active_count--;           /* Remove non-match possibility */
                   1290:             next_active_state--;
                   1291:             }
1.1.1.4 ! misho    1292:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1293:             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1294:           else
                   1295:             { ADD_NEW(state_offset, count); }
                   1296:           }
                   1297:         }
                   1298:       break;
                   1299: 
                   1300: /* ========================================================================== */
                   1301:       /* These are virtual opcodes that are used when something like
                   1302:       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
                   1303:       argument. It keeps the code above fast for the other cases. The argument
                   1304:       is in the d variable. */
                   1305: 
                   1306: #ifdef SUPPORT_UCP
                   1307:       case OP_PROP_EXTRA + OP_TYPEPLUS:
                   1308:       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
                   1309:       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
                   1310:       count = current_state->count;           /* Already matched */
                   1311:       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
                   1312:       if (clen > 0)
                   1313:         {
                   1314:         BOOL OK;
1.1.1.4 ! misho    1315:         const pcre_uint32 *cp;
1.1       misho    1316:         const ucd_record * prop = GET_UCD(c);
                   1317:         switch(code[2])
                   1318:           {
                   1319:           case PT_ANY:
                   1320:           OK = TRUE;
                   1321:           break;
                   1322: 
                   1323:           case PT_LAMP:
                   1324:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1325:             prop->chartype == ucp_Lt;
                   1326:           break;
                   1327: 
                   1328:           case PT_GC:
1.1.1.2   misho    1329:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1.1       misho    1330:           break;
                   1331: 
                   1332:           case PT_PC:
                   1333:           OK = prop->chartype == code[3];
                   1334:           break;
                   1335: 
                   1336:           case PT_SC:
                   1337:           OK = prop->script == code[3];
                   1338:           break;
                   1339: 
                   1340:           /* These are specials for combination cases. */
                   1341: 
                   1342:           case PT_ALNUM:
1.1.1.2   misho    1343:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1344:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1345:           break;
                   1346: 
                   1347:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1348:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1349:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1350:           break;
                   1351: 
                   1352:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1353:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1354:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1355:                c == CHAR_FF || c == CHAR_CR;
                   1356:           break;
                   1357: 
                   1358:           case PT_WORD:
1.1.1.2   misho    1359:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1360:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1361:                c == CHAR_UNDERSCORE;
                   1362:           break;
                   1363: 
1.1.1.4 ! misho    1364:           case PT_CLIST:
        !          1365:           cp = PRIV(ucd_caseless_sets) + code[3];
        !          1366:           for (;;)
        !          1367:             {
        !          1368:             if (c < *cp) { OK = FALSE; break; }
        !          1369:             if (c == *cp++) { OK = TRUE; break; }
        !          1370:             }
        !          1371:           break;
        !          1372: 
        !          1373:           case PT_UCNC:
        !          1374:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          1375:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          1376:                c >= 0xe000;
        !          1377:           break;
        !          1378: 
1.1       misho    1379:           /* Should never occur, but keep compilers from grumbling. */
                   1380: 
                   1381:           default:
                   1382:           OK = codevalue != OP_PROP;
                   1383:           break;
                   1384:           }
                   1385: 
                   1386:         if (OK == (d == OP_PROP))
                   1387:           {
                   1388:           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
                   1389:             {
                   1390:             active_count--;           /* Remove non-match possibility */
                   1391:             next_active_state--;
                   1392:             }
                   1393:           count++;
                   1394:           ADD_NEW(state_offset, count);
                   1395:           }
                   1396:         }
                   1397:       break;
                   1398: 
                   1399:       /*-----------------------------------------------------------------*/
                   1400:       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
                   1401:       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
                   1402:       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
                   1403:       count = current_state->count;  /* Already matched */
                   1404:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1.1.1.4 ! misho    1405:       if (clen > 0)
1.1       misho    1406:         {
1.1.1.4 ! misho    1407:         int lgb, rgb;
1.1.1.2   misho    1408:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1409:         int ncount = 0;
                   1410:         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
                   1411:           {
                   1412:           active_count--;           /* Remove non-match possibility */
                   1413:           next_active_state--;
                   1414:           }
1.1.1.4 ! misho    1415:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    1416:         while (nptr < end_subject)
                   1417:           {
1.1.1.4 ! misho    1418:           dlen = 1;
        !          1419:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
        !          1420:           rgb = UCD_GRAPHBREAK(d);
        !          1421:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    1422:           ncount++;
1.1.1.4 ! misho    1423:           lgb = rgb;
        !          1424:           nptr += dlen;
1.1       misho    1425:           }
                   1426:         count++;
                   1427:         ADD_NEW_DATA(-state_offset, count, ncount);
                   1428:         }
                   1429:       break;
                   1430: #endif
                   1431: 
                   1432:       /*-----------------------------------------------------------------*/
                   1433:       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
                   1434:       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
                   1435:       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
                   1436:       count = current_state->count;  /* Already matched */
                   1437:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1438:       if (clen > 0)
                   1439:         {
                   1440:         int ncount = 0;
                   1441:         switch (c)
                   1442:           {
1.1.1.4 ! misho    1443:           case CHAR_VT:
        !          1444:           case CHAR_FF:
        !          1445:           case CHAR_NEL:
        !          1446: #ifndef EBCDIC
1.1       misho    1447:           case 0x2028:
                   1448:           case 0x2029:
1.1.1.4 ! misho    1449: #endif  /* Not EBCDIC */
1.1       misho    1450:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1451:           goto ANYNL01;
                   1452: 
1.1.1.4 ! misho    1453:           case CHAR_CR:
        !          1454:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1.1       misho    1455:           /* Fall through */
                   1456: 
                   1457:           ANYNL01:
1.1.1.4 ! misho    1458:           case CHAR_LF:
1.1       misho    1459:           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
                   1460:             {
                   1461:             active_count--;           /* Remove non-match possibility */
                   1462:             next_active_state--;
                   1463:             }
                   1464:           count++;
                   1465:           ADD_NEW_DATA(-state_offset, count, ncount);
                   1466:           break;
                   1467: 
                   1468:           default:
                   1469:           break;
                   1470:           }
                   1471:         }
                   1472:       break;
                   1473: 
                   1474:       /*-----------------------------------------------------------------*/
                   1475:       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
                   1476:       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
                   1477:       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1478:       count = current_state->count;  /* Already matched */
                   1479:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1480:       if (clen > 0)
                   1481:         {
                   1482:         BOOL OK;
                   1483:         switch (c)
                   1484:           {
1.1.1.4 ! misho    1485:           VSPACE_CASES:
1.1       misho    1486:           OK = TRUE;
                   1487:           break;
                   1488: 
                   1489:           default:
                   1490:           OK = FALSE;
                   1491:           break;
                   1492:           }
                   1493: 
                   1494:         if (OK == (d == OP_VSPACE))
                   1495:           {
                   1496:           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1497:             {
                   1498:             active_count--;           /* Remove non-match possibility */
                   1499:             next_active_state--;
                   1500:             }
                   1501:           count++;
                   1502:           ADD_NEW_DATA(-state_offset, count, 0);
                   1503:           }
                   1504:         }
                   1505:       break;
                   1506: 
                   1507:       /*-----------------------------------------------------------------*/
                   1508:       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
                   1509:       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
                   1510:       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1511:       count = current_state->count;  /* Already matched */
                   1512:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1513:       if (clen > 0)
                   1514:         {
                   1515:         BOOL OK;
                   1516:         switch (c)
                   1517:           {
1.1.1.4 ! misho    1518:           HSPACE_CASES:
1.1       misho    1519:           OK = TRUE;
                   1520:           break;
                   1521: 
                   1522:           default:
                   1523:           OK = FALSE;
                   1524:           break;
                   1525:           }
                   1526: 
                   1527:         if (OK == (d == OP_HSPACE))
                   1528:           {
                   1529:           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1530:             {
                   1531:             active_count--;           /* Remove non-match possibility */
                   1532:             next_active_state--;
                   1533:             }
                   1534:           count++;
                   1535:           ADD_NEW_DATA(-state_offset, count, 0);
                   1536:           }
                   1537:         }
                   1538:       break;
                   1539: 
                   1540:       /*-----------------------------------------------------------------*/
                   1541: #ifdef SUPPORT_UCP
                   1542:       case OP_PROP_EXTRA + OP_TYPEQUERY:
                   1543:       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
                   1544:       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
                   1545:       count = 4;
                   1546:       goto QS1;
                   1547: 
                   1548:       case OP_PROP_EXTRA + OP_TYPESTAR:
                   1549:       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
                   1550:       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
                   1551:       count = 0;
                   1552: 
                   1553:       QS1:
                   1554: 
                   1555:       ADD_ACTIVE(state_offset + 4, 0);
                   1556:       if (clen > 0)
                   1557:         {
                   1558:         BOOL OK;
1.1.1.4 ! misho    1559:         const pcre_uint32 *cp;
1.1       misho    1560:         const ucd_record * prop = GET_UCD(c);
                   1561:         switch(code[2])
                   1562:           {
                   1563:           case PT_ANY:
                   1564:           OK = TRUE;
                   1565:           break;
                   1566: 
                   1567:           case PT_LAMP:
                   1568:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1569:             prop->chartype == ucp_Lt;
                   1570:           break;
                   1571: 
                   1572:           case PT_GC:
1.1.1.2   misho    1573:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1.1       misho    1574:           break;
                   1575: 
                   1576:           case PT_PC:
                   1577:           OK = prop->chartype == code[3];
                   1578:           break;
                   1579: 
                   1580:           case PT_SC:
                   1581:           OK = prop->script == code[3];
                   1582:           break;
                   1583: 
                   1584:           /* These are specials for combination cases. */
                   1585: 
                   1586:           case PT_ALNUM:
1.1.1.2   misho    1587:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1588:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1589:           break;
                   1590: 
                   1591:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1592:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1593:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1594:           break;
                   1595: 
                   1596:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1597:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1598:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1599:                c == CHAR_FF || c == CHAR_CR;
                   1600:           break;
                   1601: 
                   1602:           case PT_WORD:
1.1.1.2   misho    1603:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1604:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1605:                c == CHAR_UNDERSCORE;
                   1606:           break;
                   1607: 
1.1.1.4 ! misho    1608:           case PT_CLIST:
        !          1609:           cp = PRIV(ucd_caseless_sets) + code[3];
        !          1610:           for (;;)
        !          1611:             {
        !          1612:             if (c < *cp) { OK = FALSE; break; }
        !          1613:             if (c == *cp++) { OK = TRUE; break; }
        !          1614:             }
        !          1615:           break;
        !          1616: 
        !          1617:           case PT_UCNC:
        !          1618:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          1619:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          1620:                c >= 0xe000;
        !          1621:           break;
        !          1622: 
1.1       misho    1623:           /* Should never occur, but keep compilers from grumbling. */
                   1624: 
                   1625:           default:
                   1626:           OK = codevalue != OP_PROP;
                   1627:           break;
                   1628:           }
                   1629: 
                   1630:         if (OK == (d == OP_PROP))
                   1631:           {
                   1632:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
                   1633:               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
                   1634:             {
                   1635:             active_count--;           /* Remove non-match possibility */
                   1636:             next_active_state--;
                   1637:             }
                   1638:           ADD_NEW(state_offset + count, 0);
                   1639:           }
                   1640:         }
                   1641:       break;
                   1642: 
                   1643:       /*-----------------------------------------------------------------*/
                   1644:       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
                   1645:       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
                   1646:       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
                   1647:       count = 2;
                   1648:       goto QS2;
                   1649: 
                   1650:       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
                   1651:       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
                   1652:       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
                   1653:       count = 0;
                   1654: 
                   1655:       QS2:
                   1656: 
                   1657:       ADD_ACTIVE(state_offset + 2, 0);
1.1.1.4 ! misho    1658:       if (clen > 0)
1.1       misho    1659:         {
1.1.1.4 ! misho    1660:         int lgb, rgb;
1.1.1.2   misho    1661:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1662:         int ncount = 0;
                   1663:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
                   1664:             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
                   1665:           {
                   1666:           active_count--;           /* Remove non-match possibility */
                   1667:           next_active_state--;
                   1668:           }
1.1.1.4 ! misho    1669:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    1670:         while (nptr < end_subject)
                   1671:           {
1.1.1.4 ! misho    1672:           dlen = 1;
        !          1673:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
        !          1674:           rgb = UCD_GRAPHBREAK(d);
        !          1675:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    1676:           ncount++;
1.1.1.4 ! misho    1677:           lgb = rgb;
        !          1678:           nptr += dlen;
1.1       misho    1679:           }
                   1680:         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
                   1681:         }
                   1682:       break;
                   1683: #endif
                   1684: 
                   1685:       /*-----------------------------------------------------------------*/
                   1686:       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
                   1687:       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
                   1688:       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
                   1689:       count = 2;
                   1690:       goto QS3;
                   1691: 
                   1692:       case OP_ANYNL_EXTRA + OP_TYPESTAR:
                   1693:       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
                   1694:       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
                   1695:       count = 0;
                   1696: 
                   1697:       QS3:
                   1698:       ADD_ACTIVE(state_offset + 2, 0);
                   1699:       if (clen > 0)
                   1700:         {
                   1701:         int ncount = 0;
                   1702:         switch (c)
                   1703:           {
1.1.1.4 ! misho    1704:           case CHAR_VT:
        !          1705:           case CHAR_FF:
        !          1706:           case CHAR_NEL:
        !          1707: #ifndef EBCDIC
1.1       misho    1708:           case 0x2028:
                   1709:           case 0x2029:
1.1.1.4 ! misho    1710: #endif  /* Not EBCDIC */
1.1       misho    1711:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1712:           goto ANYNL02;
                   1713: 
1.1.1.4 ! misho    1714:           case CHAR_CR:
        !          1715:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1.1       misho    1716:           /* Fall through */
                   1717: 
                   1718:           ANYNL02:
1.1.1.4 ! misho    1719:           case CHAR_LF:
1.1       misho    1720:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
                   1721:               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
                   1722:             {
                   1723:             active_count--;           /* Remove non-match possibility */
                   1724:             next_active_state--;
                   1725:             }
1.1.1.4 ! misho    1726:           ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1.1       misho    1727:           break;
                   1728: 
                   1729:           default:
                   1730:           break;
                   1731:           }
                   1732:         }
                   1733:       break;
                   1734: 
                   1735:       /*-----------------------------------------------------------------*/
                   1736:       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
                   1737:       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
                   1738:       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1739:       count = 2;
                   1740:       goto QS4;
                   1741: 
                   1742:       case OP_VSPACE_EXTRA + OP_TYPESTAR:
                   1743:       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
                   1744:       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1745:       count = 0;
                   1746: 
                   1747:       QS4:
                   1748:       ADD_ACTIVE(state_offset + 2, 0);
                   1749:       if (clen > 0)
                   1750:         {
                   1751:         BOOL OK;
                   1752:         switch (c)
                   1753:           {
1.1.1.4 ! misho    1754:           VSPACE_CASES:
1.1       misho    1755:           OK = TRUE;
                   1756:           break;
                   1757: 
                   1758:           default:
                   1759:           OK = FALSE;
                   1760:           break;
                   1761:           }
                   1762:         if (OK == (d == OP_VSPACE))
                   1763:           {
                   1764:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1765:               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1766:             {
                   1767:             active_count--;           /* Remove non-match possibility */
                   1768:             next_active_state--;
                   1769:             }
1.1.1.4 ! misho    1770:           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1.1       misho    1771:           }
                   1772:         }
                   1773:       break;
                   1774: 
                   1775:       /*-----------------------------------------------------------------*/
                   1776:       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
                   1777:       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
                   1778:       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1779:       count = 2;
                   1780:       goto QS5;
                   1781: 
                   1782:       case OP_HSPACE_EXTRA + OP_TYPESTAR:
                   1783:       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
                   1784:       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1785:       count = 0;
                   1786: 
                   1787:       QS5:
                   1788:       ADD_ACTIVE(state_offset + 2, 0);
                   1789:       if (clen > 0)
                   1790:         {
                   1791:         BOOL OK;
                   1792:         switch (c)
                   1793:           {
1.1.1.4 ! misho    1794:           HSPACE_CASES:
1.1       misho    1795:           OK = TRUE;
                   1796:           break;
                   1797: 
                   1798:           default:
                   1799:           OK = FALSE;
                   1800:           break;
                   1801:           }
                   1802: 
                   1803:         if (OK == (d == OP_HSPACE))
                   1804:           {
                   1805:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1806:               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1807:             {
                   1808:             active_count--;           /* Remove non-match possibility */
                   1809:             next_active_state--;
                   1810:             }
1.1.1.4 ! misho    1811:           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1.1       misho    1812:           }
                   1813:         }
                   1814:       break;
                   1815: 
                   1816:       /*-----------------------------------------------------------------*/
                   1817: #ifdef SUPPORT_UCP
                   1818:       case OP_PROP_EXTRA + OP_TYPEEXACT:
                   1819:       case OP_PROP_EXTRA + OP_TYPEUPTO:
                   1820:       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
                   1821:       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
                   1822:       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1823:         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1.1       misho    1824:       count = current_state->count;  /* Number already matched */
                   1825:       if (clen > 0)
                   1826:         {
                   1827:         BOOL OK;
1.1.1.4 ! misho    1828:         const pcre_uint32 *cp;
1.1       misho    1829:         const ucd_record * prop = GET_UCD(c);
1.1.1.2   misho    1830:         switch(code[1 + IMM2_SIZE + 1])
1.1       misho    1831:           {
                   1832:           case PT_ANY:
                   1833:           OK = TRUE;
                   1834:           break;
                   1835: 
                   1836:           case PT_LAMP:
                   1837:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1838:             prop->chartype == ucp_Lt;
                   1839:           break;
                   1840: 
                   1841:           case PT_GC:
1.1.1.2   misho    1842:           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1.1       misho    1843:           break;
                   1844: 
                   1845:           case PT_PC:
1.1.1.2   misho    1846:           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1.1       misho    1847:           break;
                   1848: 
                   1849:           case PT_SC:
1.1.1.2   misho    1850:           OK = prop->script == code[1 + IMM2_SIZE + 2];
1.1       misho    1851:           break;
                   1852: 
                   1853:           /* These are specials for combination cases. */
                   1854: 
                   1855:           case PT_ALNUM:
1.1.1.2   misho    1856:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1857:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1858:           break;
                   1859: 
                   1860:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1861:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1862:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1863:           break;
                   1864: 
                   1865:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1866:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1867:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1868:                c == CHAR_FF || c == CHAR_CR;
                   1869:           break;
                   1870: 
                   1871:           case PT_WORD:
1.1.1.2   misho    1872:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1873:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1874:                c == CHAR_UNDERSCORE;
                   1875:           break;
                   1876: 
1.1.1.4 ! misho    1877:           case PT_CLIST:
        !          1878:           cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
        !          1879:           for (;;)
        !          1880:             {
        !          1881:             if (c < *cp) { OK = FALSE; break; }
        !          1882:             if (c == *cp++) { OK = TRUE; break; }
        !          1883:             }
        !          1884:           break;
        !          1885: 
        !          1886:           case PT_UCNC:
        !          1887:           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          1888:                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          1889:                c >= 0xe000;
        !          1890:           break;
        !          1891: 
1.1       misho    1892:           /* Should never occur, but keep compilers from grumbling. */
                   1893: 
                   1894:           default:
                   1895:           OK = codevalue != OP_PROP;
                   1896:           break;
                   1897:           }
                   1898: 
                   1899:         if (OK == (d == OP_PROP))
                   1900:           {
                   1901:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
                   1902:             {
                   1903:             active_count--;           /* Remove non-match possibility */
                   1904:             next_active_state--;
                   1905:             }
1.1.1.4 ! misho    1906:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1907:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1.1       misho    1908:           else
                   1909:             { ADD_NEW(state_offset, count); }
                   1910:           }
                   1911:         }
                   1912:       break;
                   1913: 
                   1914:       /*-----------------------------------------------------------------*/
                   1915:       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
                   1916:       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
                   1917:       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
                   1918:       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
                   1919:       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1920:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1921:       count = current_state->count;  /* Number already matched */
1.1.1.4 ! misho    1922:       if (clen > 0)
1.1       misho    1923:         {
1.1.1.4 ! misho    1924:         int lgb, rgb;
1.1.1.2   misho    1925:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1926:         int ncount = 0;
                   1927:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
                   1928:           {
                   1929:           active_count--;           /* Remove non-match possibility */
                   1930:           next_active_state--;
                   1931:           }
1.1.1.4 ! misho    1932:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    1933:         while (nptr < end_subject)
                   1934:           {
1.1.1.4 ! misho    1935:           dlen = 1;
        !          1936:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
        !          1937:           rgb = UCD_GRAPHBREAK(d);
        !          1938:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    1939:           ncount++;
1.1.1.4 ! misho    1940:           lgb = rgb;
        !          1941:           nptr += dlen;
1.1       misho    1942:           }
1.1.1.3   misho    1943:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                   1944:             reset_could_continue = TRUE;
1.1.1.4 ! misho    1945:         if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1946:           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1.1       misho    1947:         else
                   1948:           { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1949:         }
                   1950:       break;
                   1951: #endif
                   1952: 
                   1953:       /*-----------------------------------------------------------------*/
                   1954:       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
                   1955:       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
                   1956:       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
                   1957:       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
                   1958:       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1959:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1960:       count = current_state->count;  /* Number already matched */
                   1961:       if (clen > 0)
                   1962:         {
                   1963:         int ncount = 0;
                   1964:         switch (c)
                   1965:           {
1.1.1.4 ! misho    1966:           case CHAR_VT:
        !          1967:           case CHAR_FF:
        !          1968:           case CHAR_NEL:
        !          1969: #ifndef EBCDIC
1.1       misho    1970:           case 0x2028:
                   1971:           case 0x2029:
1.1.1.4 ! misho    1972: #endif  /* Not EBCDIC */
1.1       misho    1973:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1974:           goto ANYNL03;
                   1975: 
1.1.1.4 ! misho    1976:           case CHAR_CR:
        !          1977:           if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1.1       misho    1978:           /* Fall through */
                   1979: 
                   1980:           ANYNL03:
1.1.1.4 ! misho    1981:           case CHAR_LF:
1.1       misho    1982:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
                   1983:             {
                   1984:             active_count--;           /* Remove non-match possibility */
                   1985:             next_active_state--;
                   1986:             }
1.1.1.4 ! misho    1987:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    1988:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1.1       misho    1989:           else
                   1990:             { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1991:           break;
                   1992: 
                   1993:           default:
                   1994:           break;
                   1995:           }
                   1996:         }
                   1997:       break;
                   1998: 
                   1999:       /*-----------------------------------------------------------------*/
                   2000:       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
                   2001:       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
                   2002:       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
                   2003:       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
                   2004:       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    2005:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    2006:       count = current_state->count;  /* Number already matched */
                   2007:       if (clen > 0)
                   2008:         {
                   2009:         BOOL OK;
                   2010:         switch (c)
                   2011:           {
1.1.1.4 ! misho    2012:           VSPACE_CASES:
1.1       misho    2013:           OK = TRUE;
                   2014:           break;
                   2015: 
                   2016:           default:
                   2017:           OK = FALSE;
                   2018:           }
                   2019: 
                   2020:         if (OK == (d == OP_VSPACE))
                   2021:           {
                   2022:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
                   2023:             {
                   2024:             active_count--;           /* Remove non-match possibility */
                   2025:             next_active_state--;
                   2026:             }
1.1.1.4 ! misho    2027:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2028:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1.1       misho    2029:           else
                   2030:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   2031:           }
                   2032:         }
                   2033:       break;
                   2034: 
                   2035:       /*-----------------------------------------------------------------*/
                   2036:       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
                   2037:       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
                   2038:       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
                   2039:       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
                   2040:       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    2041:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    2042:       count = current_state->count;  /* Number already matched */
                   2043:       if (clen > 0)
                   2044:         {
                   2045:         BOOL OK;
                   2046:         switch (c)
                   2047:           {
1.1.1.4 ! misho    2048:           HSPACE_CASES:
1.1       misho    2049:           OK = TRUE;
                   2050:           break;
                   2051: 
                   2052:           default:
                   2053:           OK = FALSE;
                   2054:           break;
                   2055:           }
                   2056: 
                   2057:         if (OK == (d == OP_HSPACE))
                   2058:           {
                   2059:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
                   2060:             {
                   2061:             active_count--;           /* Remove non-match possibility */
                   2062:             next_active_state--;
                   2063:             }
1.1.1.4 ! misho    2064:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2065:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1.1       misho    2066:           else
                   2067:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   2068:           }
                   2069:         }
                   2070:       break;
                   2071: 
                   2072: /* ========================================================================== */
                   2073:       /* These opcodes are followed by a character that is usually compared
                   2074:       to the current subject character; it is loaded into d. We still get
                   2075:       here even if there is no subject character, because in some cases zero
                   2076:       repetitions are permitted. */
                   2077: 
                   2078:       /*-----------------------------------------------------------------*/
                   2079:       case OP_CHAR:
                   2080:       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2081:       break;
                   2082: 
                   2083:       /*-----------------------------------------------------------------*/
                   2084:       case OP_CHARI:
                   2085:       if (clen == 0) break;
                   2086: 
1.1.1.2   misho    2087: #ifdef SUPPORT_UTF
                   2088:       if (utf)
1.1       misho    2089:         {
                   2090:         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
                   2091:           {
                   2092:           unsigned int othercase;
1.1.1.2   misho    2093:           if (c < 128)
                   2094:             othercase = fcc[c];
                   2095:           else
                   2096:             /* If we have Unicode property support, we can use it to test the
                   2097:             other case of the character. */
1.1       misho    2098: #ifdef SUPPORT_UCP
1.1.1.2   misho    2099:             othercase = UCD_OTHERCASE(c);
1.1       misho    2100: #else
1.1.1.2   misho    2101:             othercase = NOTACHAR;
1.1       misho    2102: #endif
                   2103: 
                   2104:           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2105:           }
                   2106:         }
                   2107:       else
1.1.1.2   misho    2108: #endif  /* SUPPORT_UTF */
                   2109:       /* Not UTF mode */
1.1       misho    2110:         {
1.1.1.2   misho    2111:         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
                   2112:           { ADD_NEW(state_offset + 2, 0); }
1.1       misho    2113:         }
                   2114:       break;
                   2115: 
                   2116: 
                   2117: #ifdef SUPPORT_UCP
                   2118:       /*-----------------------------------------------------------------*/
                   2119:       /* This is a tricky one because it can match more than one character.
                   2120:       Find out how many characters to skip, and then set up a negative state
                   2121:       to wait for them to pass before continuing. */
                   2122: 
                   2123:       case OP_EXTUNI:
1.1.1.4 ! misho    2124:       if (clen > 0)
1.1       misho    2125:         {
1.1.1.4 ! misho    2126:         int lgb, rgb;
1.1.1.2   misho    2127:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    2128:         int ncount = 0;
1.1.1.4 ! misho    2129:         lgb = UCD_GRAPHBREAK(c);
1.1       misho    2130:         while (nptr < end_subject)
                   2131:           {
1.1.1.4 ! misho    2132:           dlen = 1;
        !          2133:           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
        !          2134:           rgb = UCD_GRAPHBREAK(d);
        !          2135:           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1.1       misho    2136:           ncount++;
1.1.1.4 ! misho    2137:           lgb = rgb;
        !          2138:           nptr += dlen;
1.1       misho    2139:           }
1.1.1.3   misho    2140:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                   2141:             reset_could_continue = TRUE;
1.1       misho    2142:         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
                   2143:         }
                   2144:       break;
                   2145: #endif
                   2146: 
                   2147:       /*-----------------------------------------------------------------*/
                   2148:       /* This is a tricky like EXTUNI because it too can match more than one
                   2149:       character (when CR is followed by LF). In this case, set up a negative
                   2150:       state to wait for one character to pass before continuing. */
                   2151: 
                   2152:       case OP_ANYNL:
                   2153:       if (clen > 0) switch(c)
                   2154:         {
1.1.1.4 ! misho    2155:         case CHAR_VT:
        !          2156:         case CHAR_FF:
        !          2157:         case CHAR_NEL:
        !          2158: #ifndef EBCDIC
1.1       misho    2159:         case 0x2028:
                   2160:         case 0x2029:
1.1.1.4 ! misho    2161: #endif  /* Not EBCDIC */
1.1       misho    2162:         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   2163: 
1.1.1.4 ! misho    2164:         case CHAR_LF:
1.1       misho    2165:         ADD_NEW(state_offset + 1, 0);
                   2166:         break;
                   2167: 
1.1.1.4 ! misho    2168:         case CHAR_CR:
1.1.1.3   misho    2169:         if (ptr + 1 >= end_subject)
                   2170:           {
                   2171:           ADD_NEW(state_offset + 1, 0);
                   2172:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                   2173:             reset_could_continue = TRUE;
                   2174:           }
1.1.1.4 ! misho    2175:         else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
1.1       misho    2176:           {
                   2177:           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                   2178:           }
                   2179:         else
                   2180:           {
                   2181:           ADD_NEW(state_offset + 1, 0);
                   2182:           }
                   2183:         break;
                   2184:         }
                   2185:       break;
                   2186: 
                   2187:       /*-----------------------------------------------------------------*/
                   2188:       case OP_NOT_VSPACE:
                   2189:       if (clen > 0) switch(c)
                   2190:         {
1.1.1.4 ! misho    2191:         VSPACE_CASES:
1.1       misho    2192:         break;
                   2193: 
                   2194:         default:
                   2195:         ADD_NEW(state_offset + 1, 0);
                   2196:         break;
                   2197:         }
                   2198:       break;
                   2199: 
                   2200:       /*-----------------------------------------------------------------*/
                   2201:       case OP_VSPACE:
                   2202:       if (clen > 0) switch(c)
                   2203:         {
1.1.1.4 ! misho    2204:         VSPACE_CASES:
1.1       misho    2205:         ADD_NEW(state_offset + 1, 0);
                   2206:         break;
                   2207: 
1.1.1.4 ! misho    2208:         default:
        !          2209:         break;
1.1       misho    2210:         }
                   2211:       break;
                   2212: 
                   2213:       /*-----------------------------------------------------------------*/
                   2214:       case OP_NOT_HSPACE:
                   2215:       if (clen > 0) switch(c)
                   2216:         {
1.1.1.4 ! misho    2217:         HSPACE_CASES:
1.1       misho    2218:         break;
                   2219: 
                   2220:         default:
                   2221:         ADD_NEW(state_offset + 1, 0);
                   2222:         break;
                   2223:         }
                   2224:       break;
                   2225: 
                   2226:       /*-----------------------------------------------------------------*/
                   2227:       case OP_HSPACE:
                   2228:       if (clen > 0) switch(c)
                   2229:         {
1.1.1.4 ! misho    2230:         HSPACE_CASES:
1.1       misho    2231:         ADD_NEW(state_offset + 1, 0);
                   2232:         break;
1.1.1.4 ! misho    2233: 
        !          2234:         default:
        !          2235:         break;
1.1       misho    2236:         }
                   2237:       break;
                   2238: 
                   2239:       /*-----------------------------------------------------------------*/
1.1.1.3   misho    2240:       /* Match a negated single character casefully. */
1.1       misho    2241: 
                   2242:       case OP_NOT:
                   2243:       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2244:       break;
                   2245: 
                   2246:       /*-----------------------------------------------------------------*/
1.1.1.3   misho    2247:       /* Match a negated single character caselessly. */
1.1       misho    2248: 
                   2249:       case OP_NOTI:
1.1.1.3   misho    2250:       if (clen > 0)
                   2251:         {
                   2252:         unsigned int otherd;
                   2253: #ifdef SUPPORT_UTF
                   2254:         if (utf && d >= 128)
                   2255:           {
                   2256: #ifdef SUPPORT_UCP
                   2257:           otherd = UCD_OTHERCASE(d);
                   2258: #endif  /* SUPPORT_UCP */
                   2259:           }
                   2260:         else
                   2261: #endif  /* SUPPORT_UTF */
                   2262:         otherd = TABLE_GET(d, fcc, d);
                   2263:         if (c != d && c != otherd)
                   2264:           { ADD_NEW(state_offset + dlen + 1, 0); }
                   2265:         }
1.1       misho    2266:       break;
                   2267: 
                   2268:       /*-----------------------------------------------------------------*/
                   2269:       case OP_PLUSI:
                   2270:       case OP_MINPLUSI:
                   2271:       case OP_POSPLUSI:
                   2272:       case OP_NOTPLUSI:
                   2273:       case OP_NOTMINPLUSI:
                   2274:       case OP_NOTPOSPLUSI:
                   2275:       caseless = TRUE;
                   2276:       codevalue -= OP_STARI - OP_STAR;
                   2277: 
                   2278:       /* Fall through */
                   2279:       case OP_PLUS:
                   2280:       case OP_MINPLUS:
                   2281:       case OP_POSPLUS:
                   2282:       case OP_NOTPLUS:
                   2283:       case OP_NOTMINPLUS:
                   2284:       case OP_NOTPOSPLUS:
                   2285:       count = current_state->count;  /* Already matched */
                   2286:       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
                   2287:       if (clen > 0)
                   2288:         {
1.1.1.4 ! misho    2289:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2290:         if (caseless)
                   2291:           {
1.1.1.2   misho    2292: #ifdef SUPPORT_UTF
                   2293:           if (utf && d >= 128)
1.1       misho    2294:             {
                   2295: #ifdef SUPPORT_UCP
                   2296:             otherd = UCD_OTHERCASE(d);
                   2297: #endif  /* SUPPORT_UCP */
                   2298:             }
                   2299:           else
1.1.1.2   misho    2300: #endif  /* SUPPORT_UTF */
                   2301:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2302:           }
                   2303:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2304:           {
                   2305:           if (count > 0 &&
                   2306:               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
                   2307:             {
                   2308:             active_count--;             /* Remove non-match possibility */
                   2309:             next_active_state--;
                   2310:             }
                   2311:           count++;
                   2312:           ADD_NEW(state_offset, count);
                   2313:           }
                   2314:         }
                   2315:       break;
                   2316: 
                   2317:       /*-----------------------------------------------------------------*/
                   2318:       case OP_QUERYI:
                   2319:       case OP_MINQUERYI:
                   2320:       case OP_POSQUERYI:
                   2321:       case OP_NOTQUERYI:
                   2322:       case OP_NOTMINQUERYI:
                   2323:       case OP_NOTPOSQUERYI:
                   2324:       caseless = TRUE;
                   2325:       codevalue -= OP_STARI - OP_STAR;
                   2326:       /* Fall through */
                   2327:       case OP_QUERY:
                   2328:       case OP_MINQUERY:
                   2329:       case OP_POSQUERY:
                   2330:       case OP_NOTQUERY:
                   2331:       case OP_NOTMINQUERY:
                   2332:       case OP_NOTPOSQUERY:
                   2333:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2334:       if (clen > 0)
                   2335:         {
1.1.1.4 ! misho    2336:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2337:         if (caseless)
                   2338:           {
1.1.1.2   misho    2339: #ifdef SUPPORT_UTF
                   2340:           if (utf && d >= 128)
1.1       misho    2341:             {
                   2342: #ifdef SUPPORT_UCP
                   2343:             otherd = UCD_OTHERCASE(d);
                   2344: #endif  /* SUPPORT_UCP */
                   2345:             }
                   2346:           else
1.1.1.2   misho    2347: #endif  /* SUPPORT_UTF */
                   2348:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2349:           }
                   2350:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2351:           {
                   2352:           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
                   2353:             {
                   2354:             active_count--;            /* Remove non-match possibility */
                   2355:             next_active_state--;
                   2356:             }
                   2357:           ADD_NEW(state_offset + dlen + 1, 0);
                   2358:           }
                   2359:         }
                   2360:       break;
                   2361: 
                   2362:       /*-----------------------------------------------------------------*/
                   2363:       case OP_STARI:
                   2364:       case OP_MINSTARI:
                   2365:       case OP_POSSTARI:
                   2366:       case OP_NOTSTARI:
                   2367:       case OP_NOTMINSTARI:
                   2368:       case OP_NOTPOSSTARI:
                   2369:       caseless = TRUE;
                   2370:       codevalue -= OP_STARI - OP_STAR;
                   2371:       /* Fall through */
                   2372:       case OP_STAR:
                   2373:       case OP_MINSTAR:
                   2374:       case OP_POSSTAR:
                   2375:       case OP_NOTSTAR:
                   2376:       case OP_NOTMINSTAR:
                   2377:       case OP_NOTPOSSTAR:
                   2378:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2379:       if (clen > 0)
                   2380:         {
1.1.1.4 ! misho    2381:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2382:         if (caseless)
                   2383:           {
1.1.1.2   misho    2384: #ifdef SUPPORT_UTF
                   2385:           if (utf && d >= 128)
1.1       misho    2386:             {
                   2387: #ifdef SUPPORT_UCP
                   2388:             otherd = UCD_OTHERCASE(d);
                   2389: #endif  /* SUPPORT_UCP */
                   2390:             }
                   2391:           else
1.1.1.2   misho    2392: #endif  /* SUPPORT_UTF */
                   2393:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2394:           }
                   2395:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2396:           {
                   2397:           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
                   2398:             {
                   2399:             active_count--;            /* Remove non-match possibility */
                   2400:             next_active_state--;
                   2401:             }
                   2402:           ADD_NEW(state_offset, 0);
                   2403:           }
                   2404:         }
                   2405:       break;
                   2406: 
                   2407:       /*-----------------------------------------------------------------*/
                   2408:       case OP_EXACTI:
                   2409:       case OP_NOTEXACTI:
                   2410:       caseless = TRUE;
                   2411:       codevalue -= OP_STARI - OP_STAR;
                   2412:       /* Fall through */
                   2413:       case OP_EXACT:
                   2414:       case OP_NOTEXACT:
                   2415:       count = current_state->count;  /* Number already matched */
                   2416:       if (clen > 0)
                   2417:         {
1.1.1.4 ! misho    2418:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2419:         if (caseless)
                   2420:           {
1.1.1.2   misho    2421: #ifdef SUPPORT_UTF
                   2422:           if (utf && d >= 128)
1.1       misho    2423:             {
                   2424: #ifdef SUPPORT_UCP
                   2425:             otherd = UCD_OTHERCASE(d);
                   2426: #endif  /* SUPPORT_UCP */
                   2427:             }
                   2428:           else
1.1.1.2   misho    2429: #endif  /* SUPPORT_UTF */
                   2430:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2431:           }
                   2432:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2433:           {
1.1.1.4 ! misho    2434:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2435:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
1.1       misho    2436:           else
                   2437:             { ADD_NEW(state_offset, count); }
                   2438:           }
                   2439:         }
                   2440:       break;
                   2441: 
                   2442:       /*-----------------------------------------------------------------*/
                   2443:       case OP_UPTOI:
                   2444:       case OP_MINUPTOI:
                   2445:       case OP_POSUPTOI:
                   2446:       case OP_NOTUPTOI:
                   2447:       case OP_NOTMINUPTOI:
                   2448:       case OP_NOTPOSUPTOI:
                   2449:       caseless = TRUE;
                   2450:       codevalue -= OP_STARI - OP_STAR;
                   2451:       /* Fall through */
                   2452:       case OP_UPTO:
                   2453:       case OP_MINUPTO:
                   2454:       case OP_POSUPTO:
                   2455:       case OP_NOTUPTO:
                   2456:       case OP_NOTMINUPTO:
                   2457:       case OP_NOTPOSUPTO:
1.1.1.2   misho    2458:       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
1.1       misho    2459:       count = current_state->count;  /* Number already matched */
                   2460:       if (clen > 0)
                   2461:         {
1.1.1.4 ! misho    2462:         pcre_uint32 otherd = NOTACHAR;
1.1       misho    2463:         if (caseless)
                   2464:           {
1.1.1.2   misho    2465: #ifdef SUPPORT_UTF
                   2466:           if (utf && d >= 128)
1.1       misho    2467:             {
                   2468: #ifdef SUPPORT_UCP
                   2469:             otherd = UCD_OTHERCASE(d);
                   2470: #endif  /* SUPPORT_UCP */
                   2471:             }
                   2472:           else
1.1.1.2   misho    2473: #endif  /* SUPPORT_UTF */
                   2474:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2475:           }
                   2476:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2477:           {
                   2478:           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
                   2479:             {
                   2480:             active_count--;             /* Remove non-match possibility */
                   2481:             next_active_state--;
                   2482:             }
1.1.1.4 ! misho    2483:           if (++count >= (int)GET2(code, 1))
1.1.1.2   misho    2484:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
1.1       misho    2485:           else
                   2486:             { ADD_NEW(state_offset, count); }
                   2487:           }
                   2488:         }
                   2489:       break;
                   2490: 
                   2491: 
                   2492: /* ========================================================================== */
                   2493:       /* These are the class-handling opcodes */
                   2494: 
                   2495:       case OP_CLASS:
                   2496:       case OP_NCLASS:
                   2497:       case OP_XCLASS:
                   2498:         {
                   2499:         BOOL isinclass = FALSE;
                   2500:         int next_state_offset;
1.1.1.2   misho    2501:         const pcre_uchar *ecode;
1.1       misho    2502: 
                   2503:         /* For a simple class, there is always just a 32-byte table, and we
                   2504:         can set isinclass from it. */
                   2505: 
                   2506:         if (codevalue != OP_XCLASS)
                   2507:           {
1.1.1.2   misho    2508:           ecode = code + 1 + (32 / sizeof(pcre_uchar));
1.1       misho    2509:           if (clen > 0)
                   2510:             {
                   2511:             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
1.1.1.2   misho    2512:               ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
1.1       misho    2513:             }
                   2514:           }
                   2515: 
                   2516:         /* An extended class may have a table or a list of single characters,
                   2517:         ranges, or both, and it may be positive or negative. There's a
                   2518:         function that sorts all this out. */
                   2519: 
                   2520:         else
                   2521:          {
                   2522:          ecode = code + GET(code, 1);
1.1.1.2   misho    2523:          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
1.1       misho    2524:          }
                   2525: 
                   2526:         /* At this point, isinclass is set for all kinds of class, and ecode
                   2527:         points to the byte after the end of the class. If there is a
                   2528:         quantifier, this is where it will be. */
                   2529: 
                   2530:         next_state_offset = (int)(ecode - start_code);
                   2531: 
                   2532:         switch (*ecode)
                   2533:           {
                   2534:           case OP_CRSTAR:
                   2535:           case OP_CRMINSTAR:
                   2536:           ADD_ACTIVE(next_state_offset + 1, 0);
                   2537:           if (isinclass) { ADD_NEW(state_offset, 0); }
                   2538:           break;
                   2539: 
                   2540:           case OP_CRPLUS:
                   2541:           case OP_CRMINPLUS:
                   2542:           count = current_state->count;  /* Already matched */
                   2543:           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
                   2544:           if (isinclass) { count++; ADD_NEW(state_offset, count); }
                   2545:           break;
                   2546: 
                   2547:           case OP_CRQUERY:
                   2548:           case OP_CRMINQUERY:
                   2549:           ADD_ACTIVE(next_state_offset + 1, 0);
                   2550:           if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
                   2551:           break;
                   2552: 
                   2553:           case OP_CRRANGE:
                   2554:           case OP_CRMINRANGE:
                   2555:           count = current_state->count;  /* Already matched */
1.1.1.4 ! misho    2556:           if (count >= (int)GET2(ecode, 1))
1.1.1.2   misho    2557:             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
1.1       misho    2558:           if (isinclass)
                   2559:             {
1.1.1.4 ! misho    2560:             int max = (int)GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2561:             if (++count >= max && max != 0)   /* Max 0 => no limit */
1.1.1.2   misho    2562:               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
1.1       misho    2563:             else
                   2564:               { ADD_NEW(state_offset, count); }
                   2565:             }
                   2566:           break;
                   2567: 
                   2568:           default:
                   2569:           if (isinclass) { ADD_NEW(next_state_offset, 0); }
                   2570:           break;
                   2571:           }
                   2572:         }
                   2573:       break;
                   2574: 
                   2575: /* ========================================================================== */
                   2576:       /* These are the opcodes for fancy brackets of various kinds. We have
                   2577:       to use recursion in order to handle them. The "always failing" assertion
                   2578:       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
                   2579:       though the other "backtracking verbs" are not supported. */
                   2580: 
                   2581:       case OP_FAIL:
                   2582:       forced_fail++;    /* Count FAILs for multiple states */
                   2583:       break;
                   2584: 
                   2585:       case OP_ASSERT:
                   2586:       case OP_ASSERT_NOT:
                   2587:       case OP_ASSERTBACK:
                   2588:       case OP_ASSERTBACK_NOT:
                   2589:         {
                   2590:         int rc;
                   2591:         int local_offsets[2];
                   2592:         int local_workspace[1000];
1.1.1.2   misho    2593:         const pcre_uchar *endasscode = code + GET(code, 1);
1.1       misho    2594: 
                   2595:         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2596: 
                   2597:         rc = internal_dfa_exec(
                   2598:           md,                                   /* static match data */
                   2599:           code,                                 /* this subexpression's code */
                   2600:           ptr,                                  /* where we currently are */
                   2601:           (int)(ptr - start_subject),           /* start offset */
                   2602:           local_offsets,                        /* offset vector */
                   2603:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2604:           local_workspace,                      /* workspace vector */
                   2605:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2606:           rlevel);                              /* function recursion level */
                   2607: 
                   2608:         if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2609:         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
                   2610:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2611:         }
                   2612:       break;
                   2613: 
                   2614:       /*-----------------------------------------------------------------*/
                   2615:       case OP_COND:
                   2616:       case OP_SCOND:
                   2617:         {
                   2618:         int local_offsets[1000];
                   2619:         int local_workspace[1000];
                   2620:         int codelink = GET(code, 1);
                   2621:         int condcode;
                   2622: 
                   2623:         /* Because of the way auto-callout works during compile, a callout item
                   2624:         is inserted between OP_COND and an assertion condition. This does not
                   2625:         happen for the other conditions. */
                   2626: 
                   2627:         if (code[LINK_SIZE+1] == OP_CALLOUT)
                   2628:           {
                   2629:           rrc = 0;
1.1.1.2   misho    2630:           if (PUBL(callout) != NULL)
1.1       misho    2631:             {
1.1.1.2   misho    2632:             PUBL(callout_block) cb;
1.1       misho    2633:             cb.version          = 1;   /* Version 1 of the callout block */
                   2634:             cb.callout_number   = code[LINK_SIZE+2];
                   2635:             cb.offset_vector    = offsets;
1.1.1.4 ! misho    2636: #if defined COMPILE_PCRE8
1.1       misho    2637:             cb.subject          = (PCRE_SPTR)start_subject;
1.1.1.4 ! misho    2638: #elif defined COMPILE_PCRE16
1.1.1.2   misho    2639:             cb.subject          = (PCRE_SPTR16)start_subject;
1.1.1.4 ! misho    2640: #elif defined COMPILE_PCRE32
        !          2641:             cb.subject          = (PCRE_SPTR32)start_subject;
1.1.1.2   misho    2642: #endif
1.1       misho    2643:             cb.subject_length   = (int)(end_subject - start_subject);
                   2644:             cb.start_match      = (int)(current_subject - start_subject);
                   2645:             cb.current_position = (int)(ptr - start_subject);
                   2646:             cb.pattern_position = GET(code, LINK_SIZE + 3);
                   2647:             cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
                   2648:             cb.capture_top      = 1;
                   2649:             cb.capture_last     = -1;
                   2650:             cb.callout_data     = md->callout_data;
                   2651:             cb.mark             = NULL;   /* No (*MARK) support */
1.1.1.2   misho    2652:             if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
1.1       misho    2653:             }
                   2654:           if (rrc > 0) break;                      /* Fail this thread */
1.1.1.2   misho    2655:           code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
1.1       misho    2656:           }
                   2657: 
                   2658:         condcode = code[LINK_SIZE+1];
                   2659: 
                   2660:         /* Back reference conditions are not supported */
                   2661: 
                   2662:         if (condcode == OP_CREF || condcode == OP_NCREF)
                   2663:           return PCRE_ERROR_DFA_UCOND;
                   2664: 
                   2665:         /* The DEFINE condition is always false */
                   2666: 
                   2667:         if (condcode == OP_DEF)
                   2668:           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2669: 
                   2670:         /* The only supported version of OP_RREF is for the value RREF_ANY,
                   2671:         which means "test if in any recursion". We can't test for specifically
                   2672:         recursed groups. */
                   2673: 
                   2674:         else if (condcode == OP_RREF || condcode == OP_NRREF)
                   2675:           {
1.1.1.2   misho    2676:           int value = GET2(code, LINK_SIZE + 2);
1.1       misho    2677:           if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
                   2678:           if (md->recursive != NULL)
1.1.1.2   misho    2679:             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
1.1       misho    2680:           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2681:           }
                   2682: 
                   2683:         /* Otherwise, the condition is an assertion */
                   2684: 
                   2685:         else
                   2686:           {
                   2687:           int rc;
1.1.1.2   misho    2688:           const pcre_uchar *asscode = code + LINK_SIZE + 1;
                   2689:           const pcre_uchar *endasscode = asscode + GET(asscode, 1);
1.1       misho    2690: 
                   2691:           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2692: 
                   2693:           rc = internal_dfa_exec(
                   2694:             md,                                   /* fixed match data */
                   2695:             asscode,                              /* this subexpression's code */
                   2696:             ptr,                                  /* where we currently are */
                   2697:             (int)(ptr - start_subject),           /* start offset */
                   2698:             local_offsets,                        /* offset vector */
                   2699:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2700:             local_workspace,                      /* workspace vector */
                   2701:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2702:             rlevel);                              /* function recursion level */
                   2703: 
                   2704:           if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2705:           if ((rc >= 0) ==
                   2706:                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
                   2707:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2708:           else
                   2709:             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2710:           }
                   2711:         }
                   2712:       break;
                   2713: 
                   2714:       /*-----------------------------------------------------------------*/
                   2715:       case OP_RECURSE:
                   2716:         {
                   2717:         dfa_recursion_info *ri;
                   2718:         int local_offsets[1000];
                   2719:         int local_workspace[1000];
1.1.1.2   misho    2720:         const pcre_uchar *callpat = start_code + GET(code, 1);
1.1       misho    2721:         int recno = (callpat == md->start_code)? 0 :
                   2722:           GET2(callpat, 1 + LINK_SIZE);
                   2723:         int rc;
                   2724: 
                   2725:         DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
                   2726: 
                   2727:         /* Check for repeating a recursion without advancing the subject
                   2728:         pointer. This should catch convoluted mutual recursions. (Some simple
                   2729:         cases are caught at compile time.) */
                   2730: 
                   2731:         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   2732:           if (recno == ri->group_num && ptr == ri->subject_position)
                   2733:             return PCRE_ERROR_RECURSELOOP;
                   2734: 
                   2735:         /* Remember this recursion and where we started it so as to
                   2736:         catch infinite loops. */
                   2737: 
                   2738:         new_recursive.group_num = recno;
                   2739:         new_recursive.subject_position = ptr;
                   2740:         new_recursive.prevrec = md->recursive;
                   2741:         md->recursive = &new_recursive;
                   2742: 
                   2743:         rc = internal_dfa_exec(
                   2744:           md,                                   /* fixed match data */
                   2745:           callpat,                              /* this subexpression's code */
                   2746:           ptr,                                  /* where we currently are */
                   2747:           (int)(ptr - start_subject),           /* start offset */
                   2748:           local_offsets,                        /* offset vector */
                   2749:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2750:           local_workspace,                      /* workspace vector */
                   2751:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2752:           rlevel);                              /* function recursion level */
                   2753: 
                   2754:         md->recursive = new_recursive.prevrec;  /* Done this recursion */
                   2755: 
                   2756:         DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
                   2757:           rc));
                   2758: 
                   2759:         /* Ran out of internal offsets */
                   2760: 
                   2761:         if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
                   2762: 
                   2763:         /* For each successful matched substring, set up the next state with a
                   2764:         count of characters to skip before trying it. Note that the count is in
                   2765:         characters, not bytes. */
                   2766: 
                   2767:         if (rc > 0)
                   2768:           {
                   2769:           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
                   2770:             {
                   2771:             int charcount = local_offsets[rc+1] - local_offsets[rc];
1.1.1.4 ! misho    2772: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.3   misho    2773:             if (utf)
                   2774:               {
                   2775:               const pcre_uchar *p = start_subject + local_offsets[rc];
                   2776:               const pcre_uchar *pp = start_subject + local_offsets[rc+1];
                   2777:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
                   2778:               }
1.1.1.2   misho    2779: #endif
1.1       misho    2780:             if (charcount > 0)
                   2781:               {
                   2782:               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
                   2783:               }
                   2784:             else
                   2785:               {
                   2786:               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
                   2787:               }
                   2788:             }
                   2789:           }
                   2790:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2791:         }
                   2792:       break;
                   2793: 
                   2794:       /*-----------------------------------------------------------------*/
                   2795:       case OP_BRAPOS:
                   2796:       case OP_SBRAPOS:
                   2797:       case OP_CBRAPOS:
                   2798:       case OP_SCBRAPOS:
                   2799:       case OP_BRAPOSZERO:
                   2800:         {
                   2801:         int charcount, matched_count;
1.1.1.2   misho    2802:         const pcre_uchar *local_ptr = ptr;
1.1       misho    2803:         BOOL allow_zero;
                   2804: 
                   2805:         if (codevalue == OP_BRAPOSZERO)
                   2806:           {
                   2807:           allow_zero = TRUE;
                   2808:           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
                   2809:           }
                   2810:         else allow_zero = FALSE;
                   2811: 
                   2812:         /* Loop to match the subpattern as many times as possible as if it were
                   2813:         a complete pattern. */
                   2814: 
                   2815:         for (matched_count = 0;; matched_count++)
                   2816:           {
                   2817:           int local_offsets[2];
                   2818:           int local_workspace[1000];
                   2819: 
                   2820:           int rc = internal_dfa_exec(
                   2821:             md,                                   /* fixed match data */
                   2822:             code,                                 /* this subexpression's code */
                   2823:             local_ptr,                            /* where we currently are */
                   2824:             (int)(ptr - start_subject),           /* start offset */
                   2825:             local_offsets,                        /* offset vector */
                   2826:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2827:             local_workspace,                      /* workspace vector */
                   2828:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2829:             rlevel);                              /* function recursion level */
                   2830: 
                   2831:           /* Failed to match */
                   2832: 
                   2833:           if (rc < 0)
                   2834:             {
                   2835:             if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2836:             break;
                   2837:             }
                   2838: 
                   2839:           /* Matched: break the loop if zero characters matched. */
                   2840: 
                   2841:           charcount = local_offsets[1] - local_offsets[0];
                   2842:           if (charcount == 0) break;
                   2843:           local_ptr += charcount;    /* Advance temporary position ptr */
                   2844:           }
                   2845: 
                   2846:         /* At this point we have matched the subpattern matched_count
                   2847:         times, and local_ptr is pointing to the character after the end of the
                   2848:         last match. */
                   2849: 
                   2850:         if (matched_count > 0 || allow_zero)
                   2851:           {
1.1.1.2   misho    2852:           const pcre_uchar *end_subpattern = code;
1.1       misho    2853:           int next_state_offset;
                   2854: 
                   2855:           do { end_subpattern += GET(end_subpattern, 1); }
                   2856:             while (*end_subpattern == OP_ALT);
                   2857:           next_state_offset =
                   2858:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2859: 
                   2860:           /* Optimization: if there are no more active states, and there
                   2861:           are no new states yet set up, then skip over the subject string
                   2862:           right here, to save looping. Otherwise, set up the new state to swing
                   2863:           into action when the end of the matched substring is reached. */
                   2864: 
                   2865:           if (i + 1 >= active_count && new_count == 0)
                   2866:             {
                   2867:             ptr = local_ptr;
                   2868:             clen = 0;
                   2869:             ADD_NEW(next_state_offset, 0);
                   2870:             }
                   2871:           else
                   2872:             {
1.1.1.2   misho    2873:             const pcre_uchar *p = ptr;
                   2874:             const pcre_uchar *pp = local_ptr;
1.1       misho    2875:             charcount = (int)(pp - p);
1.1.1.4 ! misho    2876: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.3   misho    2877:             if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
1.1.1.2   misho    2878: #endif
1.1       misho    2879:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2880:             }
                   2881:           }
                   2882:         }
                   2883:       break;
                   2884: 
                   2885:       /*-----------------------------------------------------------------*/
                   2886:       case OP_ONCE:
                   2887:       case OP_ONCE_NC:
                   2888:         {
                   2889:         int local_offsets[2];
                   2890:         int local_workspace[1000];
                   2891: 
                   2892:         int rc = internal_dfa_exec(
                   2893:           md,                                   /* fixed match data */
                   2894:           code,                                 /* this subexpression's code */
                   2895:           ptr,                                  /* where we currently are */
                   2896:           (int)(ptr - start_subject),           /* start offset */
                   2897:           local_offsets,                        /* offset vector */
                   2898:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2899:           local_workspace,                      /* workspace vector */
                   2900:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2901:           rlevel);                              /* function recursion level */
                   2902: 
                   2903:         if (rc >= 0)
                   2904:           {
1.1.1.2   misho    2905:           const pcre_uchar *end_subpattern = code;
1.1       misho    2906:           int charcount = local_offsets[1] - local_offsets[0];
                   2907:           int next_state_offset, repeat_state_offset;
                   2908: 
                   2909:           do { end_subpattern += GET(end_subpattern, 1); }
                   2910:             while (*end_subpattern == OP_ALT);
                   2911:           next_state_offset =
                   2912:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2913: 
                   2914:           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
                   2915:           arrange for the repeat state also to be added to the relevant list.
                   2916:           Calculate the offset, or set -1 for no repeat. */
                   2917: 
                   2918:           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
                   2919:                                  *end_subpattern == OP_KETRMIN)?
                   2920:             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
                   2921: 
                   2922:           /* If we have matched an empty string, add the next state at the
                   2923:           current character pointer. This is important so that the duplicate
                   2924:           checking kicks in, which is what breaks infinite loops that match an
                   2925:           empty string. */
                   2926: 
                   2927:           if (charcount == 0)
                   2928:             {
                   2929:             ADD_ACTIVE(next_state_offset, 0);
                   2930:             }
                   2931: 
                   2932:           /* Optimization: if there are no more active states, and there
                   2933:           are no new states yet set up, then skip over the subject string
                   2934:           right here, to save looping. Otherwise, set up the new state to swing
                   2935:           into action when the end of the matched substring is reached. */
                   2936: 
                   2937:           else if (i + 1 >= active_count && new_count == 0)
                   2938:             {
                   2939:             ptr += charcount;
                   2940:             clen = 0;
                   2941:             ADD_NEW(next_state_offset, 0);
                   2942: 
                   2943:             /* If we are adding a repeat state at the new character position,
                   2944:             we must fudge things so that it is the only current state.
                   2945:             Otherwise, it might be a duplicate of one we processed before, and
                   2946:             that would cause it to be skipped. */
                   2947: 
                   2948:             if (repeat_state_offset >= 0)
                   2949:               {
                   2950:               next_active_state = active_states;
                   2951:               active_count = 0;
                   2952:               i = -1;
                   2953:               ADD_ACTIVE(repeat_state_offset, 0);
                   2954:               }
                   2955:             }
                   2956:           else
                   2957:             {
1.1.1.4 ! misho    2958: #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1.1.1.3   misho    2959:             if (utf)
                   2960:               {
                   2961:               const pcre_uchar *p = start_subject + local_offsets[0];
                   2962:               const pcre_uchar *pp = start_subject + local_offsets[1];
                   2963:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
                   2964:               }
1.1.1.2   misho    2965: #endif
1.1       misho    2966:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2967:             if (repeat_state_offset >= 0)
                   2968:               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
                   2969:             }
                   2970:           }
                   2971:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2972:         }
                   2973:       break;
                   2974: 
                   2975: 
                   2976: /* ========================================================================== */
                   2977:       /* Handle callouts */
                   2978: 
                   2979:       case OP_CALLOUT:
                   2980:       rrc = 0;
1.1.1.2   misho    2981:       if (PUBL(callout) != NULL)
1.1       misho    2982:         {
1.1.1.2   misho    2983:         PUBL(callout_block) cb;
1.1       misho    2984:         cb.version          = 1;   /* Version 1 of the callout block */
                   2985:         cb.callout_number   = code[1];
                   2986:         cb.offset_vector    = offsets;
1.1.1.4 ! misho    2987: #if defined COMPILE_PCRE8
1.1       misho    2988:         cb.subject          = (PCRE_SPTR)start_subject;
1.1.1.4 ! misho    2989: #elif defined COMPILE_PCRE16
1.1.1.2   misho    2990:         cb.subject          = (PCRE_SPTR16)start_subject;
1.1.1.4 ! misho    2991: #elif defined COMPILE_PCRE32
        !          2992:         cb.subject          = (PCRE_SPTR32)start_subject;
1.1.1.2   misho    2993: #endif
1.1       misho    2994:         cb.subject_length   = (int)(end_subject - start_subject);
                   2995:         cb.start_match      = (int)(current_subject - start_subject);
                   2996:         cb.current_position = (int)(ptr - start_subject);
                   2997:         cb.pattern_position = GET(code, 2);
                   2998:         cb.next_item_length = GET(code, 2 + LINK_SIZE);
                   2999:         cb.capture_top      = 1;
                   3000:         cb.capture_last     = -1;
                   3001:         cb.callout_data     = md->callout_data;
                   3002:         cb.mark             = NULL;   /* No (*MARK) support */
1.1.1.2   misho    3003:         if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
1.1       misho    3004:         }
                   3005:       if (rrc == 0)
1.1.1.2   misho    3006:         { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
1.1       misho    3007:       break;
                   3008: 
                   3009: 
                   3010: /* ========================================================================== */
                   3011:       default:        /* Unsupported opcode */
                   3012:       return PCRE_ERROR_DFA_UITEM;
                   3013:       }
                   3014: 
                   3015:     NEXT_ACTIVE_STATE: continue;
                   3016: 
                   3017:     }      /* End of loop scanning active states */
                   3018: 
                   3019:   /* We have finished the processing at the current subject character. If no
                   3020:   new states have been set for the next character, we have found all the
                   3021:   matches that we are going to find. If we are at the top level and partial
                   3022:   matching has been requested, check for appropriate conditions.
                   3023: 
                   3024:   The "forced_ fail" variable counts the number of (*F) encountered for the
                   3025:   character. If it is equal to the original active_count (saved in
                   3026:   workspace[1]) it means that (*F) was found on every active state. In this
                   3027:   case we don't want to give a partial match.
                   3028: 
                   3029:   The "could_continue" variable is true if a state could have continued but
                   3030:   for the fact that the end of the subject was reached. */
                   3031: 
                   3032:   if (new_count <= 0)
                   3033:     {
                   3034:     if (rlevel == 1 &&                               /* Top level, and */
1.1.1.3   misho    3035:         could_continue &&                            /* Some could go on, and */
1.1       misho    3036:         forced_fail != workspace[1] &&               /* Not all forced fail & */
                   3037:         (                                            /* either... */
                   3038:         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
                   3039:         ||                                           /* or... */
                   3040:         ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
                   3041:          match_count < 0)                            /* no matches */
                   3042:         ) &&                                         /* And... */
1.1.1.3   misho    3043:         (
                   3044:         partial_newline ||                           /* Either partial NL */
                   3045:           (                                          /* or ... */
                   3046:           ptr >= end_subject &&                /* End of subject and */
                   3047:           ptr > md->start_used_ptr)            /* Inspected non-empty string */
                   3048:           )
                   3049:         )
1.1       misho    3050:       match_count = PCRE_ERROR_PARTIAL;
                   3051:     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                   3052:       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
                   3053:       rlevel*2-2, SP));
                   3054:     break;        /* In effect, "return", but see the comment below */
                   3055:     }
                   3056: 
                   3057:   /* One or more states are active for the next character. */
                   3058: 
                   3059:   ptr += clen;    /* Advance to next subject character */
                   3060:   }               /* Loop to move along the subject string */
                   3061: 
                   3062: /* Control gets here from "break" a few lines above. We do it this way because
                   3063: if we use "return" above, we have compiler trouble. Some compilers warn if
                   3064: there's nothing here because they think the function doesn't return a value. On
                   3065: the other hand, if we put a dummy statement here, some more clever compilers
                   3066: complain that it can't be reached. Sigh. */
                   3067: 
                   3068: return match_count;
                   3069: }
                   3070: 
                   3071: 
                   3072: 
                   3073: 
                   3074: /*************************************************
                   3075: *    Execute a Regular Expression - DFA engine   *
                   3076: *************************************************/
                   3077: 
                   3078: /* This external function applies a compiled re to a subject string using a DFA
                   3079: engine. This function calls the internal function multiple times if the pattern
                   3080: is not anchored.
                   3081: 
                   3082: Arguments:
                   3083:   argument_re     points to the compiled expression
                   3084:   extra_data      points to extra data or is NULL
                   3085:   subject         points to the subject string
                   3086:   length          length of subject string (may contain binary zeros)
                   3087:   start_offset    where to start in the subject string
                   3088:   options         option bits
                   3089:   offsets         vector of match offsets
                   3090:   offsetcount     size of same
                   3091:   workspace       workspace vector
                   3092:   wscount         size of same
                   3093: 
                   3094: Returns:          > 0 => number of match offset pairs placed in offsets
                   3095:                   = 0 => offsets overflowed; longest matches are present
                   3096:                    -1 => failed to match
                   3097:                  < -1 => some kind of unexpected problem
                   3098: */
                   3099: 
1.1.1.4 ! misho    3100: #if defined COMPILE_PCRE8
1.1       misho    3101: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3102: pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   3103:   const char *subject, int length, int start_offset, int options, int *offsets,
                   3104:   int offsetcount, int *workspace, int wscount)
1.1.1.4 ! misho    3105: #elif defined COMPILE_PCRE16
1.1.1.2   misho    3106: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3107: pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
                   3108:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
                   3109:   int offsetcount, int *workspace, int wscount)
1.1.1.4 ! misho    3110: #elif defined COMPILE_PCRE32
        !          3111: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !          3112: pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
        !          3113:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
        !          3114:   int offsetcount, int *workspace, int wscount)
1.1.1.2   misho    3115: #endif
1.1       misho    3116: {
1.1.1.2   misho    3117: REAL_PCRE *re = (REAL_PCRE *)argument_re;
1.1       misho    3118: dfa_match_data match_block;
                   3119: dfa_match_data *md = &match_block;
1.1.1.2   misho    3120: BOOL utf, anchored, startline, firstline;
                   3121: const pcre_uchar *current_subject, *end_subject;
1.1       misho    3122: const pcre_study_data *study = NULL;
                   3123: 
1.1.1.2   misho    3124: const pcre_uchar *req_char_ptr;
                   3125: const pcre_uint8 *start_bits = NULL;
                   3126: BOOL has_first_char = FALSE;
                   3127: BOOL has_req_char = FALSE;
                   3128: pcre_uchar first_char = 0;
                   3129: pcre_uchar first_char2 = 0;
                   3130: pcre_uchar req_char = 0;
                   3131: pcre_uchar req_char2 = 0;
1.1       misho    3132: int newline;
                   3133: 
                   3134: /* Plausibility checks */
                   3135: 
                   3136: if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   3137: if (re == NULL || subject == NULL || workspace == NULL ||
                   3138:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   3139: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   3140: if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
1.1.1.4 ! misho    3141: if (length < 0) return PCRE_ERROR_BADLENGTH;
1.1       misho    3142: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   3143: 
1.1.1.3   misho    3144: /* Check that the first field in the block is the magic number. If it is not,
                   3145: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
                   3146: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
                   3147: means that the pattern is likely compiled with different endianness. */
                   3148: 
                   3149: if (re->magic_number != MAGIC_NUMBER)
                   3150:   return re->magic_number == REVERSED_MAGIC_NUMBER?
                   3151:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
                   3152: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
                   3153: 
                   3154: /* If restarting after a partial match, do some sanity checks on the contents
                   3155: of the workspace. */
                   3156: 
                   3157: if ((options & PCRE_DFA_RESTART) != 0)
                   3158:   {
                   3159:   if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
                   3160:     workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
                   3161:       return PCRE_ERROR_DFA_BADRESTART;
                   3162:   }
                   3163: 
                   3164: /* Set up study, callout, and table data */
1.1       misho    3165: 
                   3166: md->tables = re->tables;
                   3167: md->callout_data = NULL;
                   3168: 
                   3169: if (extra_data != NULL)
                   3170:   {
                   3171:   unsigned int flags = extra_data->flags;
                   3172:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   3173:     study = (const pcre_study_data *)extra_data->study_data;
                   3174:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
                   3175:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   3176:     return PCRE_ERROR_DFA_UMLIMIT;
                   3177:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   3178:     md->callout_data = extra_data->callout_data;
                   3179:   if ((flags & PCRE_EXTRA_TABLES) != 0)
                   3180:     md->tables = extra_data->tables;
                   3181:   }
                   3182: 
                   3183: /* Set some local values */
                   3184: 
1.1.1.2   misho    3185: current_subject = (const pcre_uchar *)subject + start_offset;
                   3186: end_subject = (const pcre_uchar *)subject + length;
                   3187: req_char_ptr = current_subject - 1;
                   3188: 
                   3189: #ifdef SUPPORT_UTF
1.1.1.4 ! misho    3190: /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
1.1.1.2   misho    3191: utf = (re->options & PCRE_UTF8) != 0;
1.1       misho    3192: #else
1.1.1.2   misho    3193: utf = FALSE;
1.1       misho    3194: #endif
                   3195: 
                   3196: anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
                   3197:   (re->options & PCRE_ANCHORED) != 0;
                   3198: 
                   3199: /* The remaining fixed data for passing around. */
                   3200: 
1.1.1.2   misho    3201: md->start_code = (const pcre_uchar *)argument_re +
1.1       misho    3202:     re->name_table_offset + re->name_count * re->name_entry_size;
1.1.1.2   misho    3203: md->start_subject = (const pcre_uchar *)subject;
1.1       misho    3204: md->end_subject = end_subject;
                   3205: md->start_offset = start_offset;
                   3206: md->moptions = options;
                   3207: md->poptions = re->options;
                   3208: 
                   3209: /* If the BSR option is not set at match time, copy what was set
                   3210: at compile time. */
                   3211: 
                   3212: if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
                   3213:   {
                   3214:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   3215:     md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
                   3216: #ifdef BSR_ANYCRLF
                   3217:   else md->moptions |= PCRE_BSR_ANYCRLF;
                   3218: #endif
                   3219:   }
                   3220: 
                   3221: /* Handle different types of newline. The three bits give eight cases. If
                   3222: nothing is set at run time, whatever was used at compile time applies. */
                   3223: 
                   3224: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
                   3225:          PCRE_NEWLINE_BITS)
                   3226:   {
                   3227:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   3228:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   3229:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   3230:   case PCRE_NEWLINE_CR+
                   3231:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   3232:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   3233:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   3234:   default: return PCRE_ERROR_BADNEWLINE;
                   3235:   }
                   3236: 
                   3237: if (newline == -2)
                   3238:   {
                   3239:   md->nltype = NLTYPE_ANYCRLF;
                   3240:   }
                   3241: else if (newline < 0)
                   3242:   {
                   3243:   md->nltype = NLTYPE_ANY;
                   3244:   }
                   3245: else
                   3246:   {
                   3247:   md->nltype = NLTYPE_FIXED;
                   3248:   if (newline > 255)
                   3249:     {
                   3250:     md->nllen = 2;
                   3251:     md->nl[0] = (newline >> 8) & 255;
                   3252:     md->nl[1] = newline & 255;
                   3253:     }
                   3254:   else
                   3255:     {
                   3256:     md->nllen = 1;
                   3257:     md->nl[0] = newline;
                   3258:     }
                   3259:   }
                   3260: 
                   3261: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   3262: back the character offset. */
                   3263: 
1.1.1.2   misho    3264: #ifdef SUPPORT_UTF
                   3265: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
1.1       misho    3266:   {
                   3267:   int erroroffset;
1.1.1.2   misho    3268:   int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
1.1       misho    3269:   if (errorcode != 0)
                   3270:     {
                   3271:     if (offsetcount >= 2)
                   3272:       {
                   3273:       offsets[0] = erroroffset;
                   3274:       offsets[1] = errorcode;
                   3275:       }
1.1.1.4 ! misho    3276: #if defined COMPILE_PCRE8
        !          3277:     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
1.1       misho    3278:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
1.1.1.4 ! misho    3279: #elif defined COMPILE_PCRE16
        !          3280:     return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
        !          3281:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
        !          3282: #elif defined COMPILE_PCRE32
        !          3283:     return PCRE_ERROR_BADUTF32;
        !          3284: #endif
1.1       misho    3285:     }
1.1.1.4 ! misho    3286: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
1.1       misho    3287:   if (start_offset > 0 && start_offset < length &&
1.1.1.2   misho    3288:         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
1.1       misho    3289:     return PCRE_ERROR_BADUTF8_OFFSET;
1.1.1.4 ! misho    3290: #endif
1.1       misho    3291:   }
                   3292: #endif
                   3293: 
                   3294: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   3295: is a feature that makes it possible to save compiled regex and re-use them
                   3296: in other programs later. */
                   3297: 
1.1.1.2   misho    3298: if (md->tables == NULL) md->tables = PRIV(default_tables);
1.1       misho    3299: 
1.1.1.2   misho    3300: /* The "must be at the start of a line" flags are used in a loop when finding
                   3301: where to start. */
1.1       misho    3302: 
                   3303: startline = (re->flags & PCRE_STARTLINE) != 0;
                   3304: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   3305: 
                   3306: /* Set up the first character to match, if available. The first_byte value is
                   3307: never set for an anchored regular expression, but the anchoring may be forced
                   3308: at run time, so we have to test for anchoring. The first char may be unset for
                   3309: an unanchored pattern, of course. If there's no first char and the pattern was
                   3310: studied, there may be a bitmap of possible first characters. */
                   3311: 
                   3312: if (!anchored)
                   3313:   {
                   3314:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   3315:     {
1.1.1.2   misho    3316:     has_first_char = TRUE;
                   3317:     first_char = first_char2 = (pcre_uchar)(re->first_char);
                   3318:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
                   3319:       {
                   3320:       first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
                   3321: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   3322:       if (utf && first_char > 127)
                   3323:         first_char2 = UCD_OTHERCASE(first_char);
                   3324: #endif
                   3325:       }
1.1       misho    3326:     }
                   3327:   else
                   3328:     {
                   3329:     if (!startline && study != NULL &&
                   3330:          (study->flags & PCRE_STUDY_MAPPED) != 0)
                   3331:       start_bits = study->start_bits;
                   3332:     }
                   3333:   }
                   3334: 
                   3335: /* For anchored or unanchored matches, there may be a "last known required
                   3336: character" set. */
                   3337: 
                   3338: if ((re->flags & PCRE_REQCHSET) != 0)
                   3339:   {
1.1.1.2   misho    3340:   has_req_char = TRUE;
                   3341:   req_char = req_char2 = (pcre_uchar)(re->req_char);
                   3342:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
                   3343:     {
                   3344:     req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
                   3345: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   3346:     if (utf && req_char > 127)
                   3347:       req_char2 = UCD_OTHERCASE(req_char);
                   3348: #endif
                   3349:     }
1.1       misho    3350:   }
                   3351: 
                   3352: /* Call the main matching function, looping for a non-anchored regex after a
                   3353: failed match. If not restarting, perform certain optimizations at the start of
                   3354: a match. */
                   3355: 
                   3356: for (;;)
                   3357:   {
                   3358:   int rc;
                   3359: 
                   3360:   if ((options & PCRE_DFA_RESTART) == 0)
                   3361:     {
1.1.1.2   misho    3362:     const pcre_uchar *save_end_subject = end_subject;
1.1       misho    3363: 
                   3364:     /* If firstline is TRUE, the start of the match is constrained to the first
                   3365:     line of a multiline string. Implement this by temporarily adjusting
                   3366:     end_subject so that we stop scanning at a newline. If the match fails at
                   3367:     the newline, later code breaks this loop. */
                   3368: 
                   3369:     if (firstline)
                   3370:       {
1.1.1.2   misho    3371:       PCRE_PUCHAR t = current_subject;
                   3372: #ifdef SUPPORT_UTF
                   3373:       if (utf)
1.1       misho    3374:         {
                   3375:         while (t < md->end_subject && !IS_NEWLINE(t))
                   3376:           {
                   3377:           t++;
1.1.1.2   misho    3378:           ACROSSCHAR(t < end_subject, *t, t++);
1.1       misho    3379:           }
                   3380:         }
                   3381:       else
                   3382: #endif
                   3383:       while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   3384:       end_subject = t;
                   3385:       }
                   3386: 
                   3387:     /* There are some optimizations that avoid running the match if a known
                   3388:     starting point is not found. However, there is an option that disables
                   3389:     these, for testing and for ensuring that all callouts do actually occur.
                   3390:     The option can be set in the regex by (*NO_START_OPT) or passed in
                   3391:     match-time options. */
                   3392: 
                   3393:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   3394:       {
1.1.1.2   misho    3395:       /* Advance to a known first char. */
1.1       misho    3396: 
1.1.1.2   misho    3397:       if (has_first_char)
1.1       misho    3398:         {
1.1.1.2   misho    3399:         if (first_char != first_char2)
1.1.1.4 ! misho    3400:           {
        !          3401:           pcre_uchar csc;
1.1       misho    3402:           while (current_subject < end_subject &&
1.1.1.4 ! misho    3403:                  (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
1.1       misho    3404:             current_subject++;
1.1.1.4 ! misho    3405:           }
1.1       misho    3406:         else
                   3407:           while (current_subject < end_subject &&
1.1.1.4 ! misho    3408:                  RAWUCHARTEST(current_subject) != first_char)
1.1       misho    3409:             current_subject++;
                   3410:         }
                   3411: 
                   3412:       /* Or to just after a linebreak for a multiline match if possible */
                   3413: 
                   3414:       else if (startline)
                   3415:         {
                   3416:         if (current_subject > md->start_subject + start_offset)
                   3417:           {
1.1.1.2   misho    3418: #ifdef SUPPORT_UTF
                   3419:           if (utf)
1.1       misho    3420:             {
                   3421:             while (current_subject < end_subject &&
                   3422:                    !WAS_NEWLINE(current_subject))
                   3423:               {
                   3424:               current_subject++;
1.1.1.2   misho    3425:               ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3426:                 current_subject++);
1.1       misho    3427:               }
                   3428:             }
                   3429:           else
                   3430: #endif
                   3431:           while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
                   3432:             current_subject++;
                   3433: 
                   3434:           /* If we have just passed a CR and the newline option is ANY or
                   3435:           ANYCRLF, and we are now at a LF, advance the match position by one
                   3436:           more character. */
                   3437: 
1.1.1.4 ! misho    3438:           if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
1.1       misho    3439:                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   3440:                current_subject < end_subject &&
1.1.1.4 ! misho    3441:                RAWUCHARTEST(current_subject) == CHAR_NL)
1.1       misho    3442:             current_subject++;
                   3443:           }
                   3444:         }
                   3445: 
                   3446:       /* Or to a non-unique first char after study */
                   3447: 
                   3448:       else if (start_bits != NULL)
                   3449:         {
                   3450:         while (current_subject < end_subject)
                   3451:           {
1.1.1.4 ! misho    3452:           register pcre_uint32 c = RAWUCHARTEST(current_subject);
1.1.1.2   misho    3453: #ifndef COMPILE_PCRE8
                   3454:           if (c > 255) c = 255;
                   3455: #endif
1.1       misho    3456:           if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   3457:             {
                   3458:             current_subject++;
1.1.1.2   misho    3459: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
                   3460:             /* In non 8-bit mode, the iteration will stop for
                   3461:             characters > 255 at the beginning or not stop at all. */
                   3462:             if (utf)
                   3463:               ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3464:                 current_subject++);
1.1       misho    3465: #endif
                   3466:             }
                   3467:           else break;
                   3468:           }
                   3469:         }
                   3470:       }
                   3471: 
                   3472:     /* Restore fudged end_subject */
                   3473: 
                   3474:     end_subject = save_end_subject;
                   3475: 
                   3476:     /* The following two optimizations are disabled for partial matching or if
                   3477:     disabling is explicitly requested (and of course, by the test above, this
                   3478:     code is not obeyed when restarting after a partial match). */
                   3479: 
                   3480:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
                   3481:         (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
                   3482:       {
                   3483:       /* If the pattern was studied, a minimum subject length may be set. This
                   3484:       is a lower bound; no actual string of that length may actually match the
                   3485:       pattern. Although the value is, strictly, in characters, we treat it as
                   3486:       bytes to avoid spending too much time in this optimization. */
                   3487: 
                   3488:       if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   3489:           (pcre_uint32)(end_subject - current_subject) < study->minlength)
                   3490:         return PCRE_ERROR_NOMATCH;
                   3491: 
1.1.1.2   misho    3492:       /* If req_char is set, we know that that character must appear in the
                   3493:       subject for the match to succeed. If the first character is set, req_char
1.1       misho    3494:       must be later in the subject; otherwise the test starts at the match
                   3495:       point. This optimization can save a huge amount of work in patterns with
                   3496:       nested unlimited repeats that aren't going to match. Writing separate
                   3497:       code for cased/caseless versions makes it go faster, as does using an
                   3498:       autoincrement and backing off on a match.
                   3499: 
                   3500:       HOWEVER: when the subject string is very, very long, searching to its end
                   3501:       can take a long time, and give bad performance on quite ordinary
                   3502:       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
                   3503:       string... so we don't do this when the string is sufficiently long. */
                   3504: 
1.1.1.2   misho    3505:       if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
1.1       misho    3506:         {
1.1.1.2   misho    3507:         register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
1.1       misho    3508: 
                   3509:         /* We don't need to repeat the search if we haven't yet reached the
                   3510:         place we found it at last time. */
                   3511: 
1.1.1.2   misho    3512:         if (p > req_char_ptr)
1.1       misho    3513:           {
1.1.1.2   misho    3514:           if (req_char != req_char2)
1.1       misho    3515:             {
                   3516:             while (p < end_subject)
                   3517:               {
1.1.1.4 ! misho    3518:               register pcre_uint32 pp = RAWUCHARINCTEST(p);
1.1.1.2   misho    3519:               if (pp == req_char || pp == req_char2) { p--; break; }
1.1       misho    3520:               }
                   3521:             }
                   3522:           else
                   3523:             {
                   3524:             while (p < end_subject)
                   3525:               {
1.1.1.4 ! misho    3526:               if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
1.1       misho    3527:               }
                   3528:             }
                   3529: 
                   3530:           /* If we can't find the required character, break the matching loop,
                   3531:           which will cause a return or PCRE_ERROR_NOMATCH. */
                   3532: 
                   3533:           if (p >= end_subject) break;
                   3534: 
                   3535:           /* If we have found the required character, save the point where we
                   3536:           found it, so that we don't search again next time round the loop if
                   3537:           the start hasn't passed this character yet. */
                   3538: 
1.1.1.2   misho    3539:           req_char_ptr = p;
1.1       misho    3540:           }
                   3541:         }
                   3542:       }
                   3543:     }   /* End of optimizations that are done when not restarting */
                   3544: 
                   3545:   /* OK, now we can do the business */
                   3546: 
                   3547:   md->start_used_ptr = current_subject;
                   3548:   md->recursive = NULL;
                   3549: 
                   3550:   rc = internal_dfa_exec(
                   3551:     md,                                /* fixed match data */
                   3552:     md->start_code,                    /* this subexpression's code */
                   3553:     current_subject,                   /* where we currently are */
                   3554:     start_offset,                      /* start offset in subject */
                   3555:     offsets,                           /* offset vector */
                   3556:     offsetcount,                       /* size of same */
                   3557:     workspace,                         /* workspace vector */
                   3558:     wscount,                           /* size of same */
                   3559:     0);                                /* function recurse level */
                   3560: 
                   3561:   /* Anything other than "no match" means we are done, always; otherwise, carry
                   3562:   on only if not anchored. */
                   3563: 
1.1.1.4 ! misho    3564:   if (rc != PCRE_ERROR_NOMATCH || anchored)
        !          3565:     {
        !          3566:     if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
        !          3567:       {
        !          3568:       offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
        !          3569:       offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
        !          3570:       if (offsetcount > 2)
        !          3571:         offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
        !          3572:       }
        !          3573:     return rc;
        !          3574:     }
1.1       misho    3575: 
                   3576:   /* Advance to the next subject character unless we are at the end of a line
                   3577:   and firstline is set. */
                   3578: 
                   3579:   if (firstline && IS_NEWLINE(current_subject)) break;
                   3580:   current_subject++;
1.1.1.2   misho    3581: #ifdef SUPPORT_UTF
                   3582:   if (utf)
1.1       misho    3583:     {
1.1.1.2   misho    3584:     ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3585:       current_subject++);
1.1       misho    3586:     }
1.1.1.2   misho    3587: #endif
1.1       misho    3588:   if (current_subject > end_subject) break;
                   3589: 
                   3590:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   3591:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   3592:   or ANY or ANYCRLF, advance the match position by one more character. */
                   3593: 
1.1.1.4 ! misho    3594:   if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
1.1       misho    3595:       current_subject < end_subject &&
1.1.1.4 ! misho    3596:       RAWUCHARTEST(current_subject) == CHAR_NL &&
1.1       misho    3597:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   3598:         (md->nltype == NLTYPE_ANY ||
                   3599:          md->nltype == NLTYPE_ANYCRLF ||
                   3600:          md->nllen == 2))
                   3601:     current_subject++;
                   3602: 
                   3603:   }   /* "Bumpalong" loop */
                   3604: 
                   3605: return PCRE_ERROR_NOMATCH;
                   3606: }
                   3607: 
                   3608: /* End of pcre_dfa_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>