embedaddon/pcre/pcre_dfa_exec.c - annotate

Return to pcre_dfa_exec.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre
Annotation of embedaddon/pcre/pcre_dfa_exec.c, revision 1.1.1.3

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language (but see
                      7: below for why this module is different).
                      8: 
                      9:                        Written by Philip Hazel
1.1.1.2   misho      10:            Copyright (c) 1997-2012 University of Cambridge
1.1       misho      11: 
                     12: -----------------------------------------------------------------------------
                     13: Redistribution and use in source and binary forms, with or without
                     14: modification, are permitted provided that the following conditions are met:
                     15: 
                     16:     * Redistributions of source code must retain the above copyright notice,
                     17:       this list of conditions and the following disclaimer.
                     18: 
                     19:     * Redistributions in binary form must reproduce the above copyright
                     20:       notice, this list of conditions and the following disclaimer in the
                     21:       documentation and/or other materials provided with the distribution.
                     22: 
                     23:     * Neither the name of the University of Cambridge nor the names of its
                     24:       contributors may be used to endorse or promote products derived from
                     25:       this software without specific prior written permission.
                     26: 
                     27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37: POSSIBILITY OF SUCH DAMAGE.
                     38: -----------------------------------------------------------------------------
                     39: */
                     40: 
                     41: /* This module contains the external function pcre_dfa_exec(), which is an
                     42: alternative matching function that uses a sort of DFA algorithm (not a true
1.1.1.3 ! misho      43: FSM). This is NOT Perl-compatible, but it has advantages in certain
1.1       misho      44: applications. */
                     45: 
                     46: 
                     47: /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
                     48: the performance of his patterns greatly. I could not use it as it stood, as it
                     49: was not thread safe, and made assumptions about pattern sizes. Also, it caused
                     50: test 7 to loop, and test 9 to crash with a segfault.
                     51: 
                     52: The issue is the check for duplicate states, which is done by a simple linear
                     53: search up the state list. (Grep for "duplicate" below to find the code.) For
                     54: many patterns, there will never be many states active at one time, so a simple
                     55: linear search is fine. In patterns that have many active states, it might be a
                     56: bottleneck. The suggested code used an indexing scheme to remember which states
                     57: had previously been used for each character, and avoided the linear search when
                     58: it knew there was no chance of a duplicate. This was implemented when adding
                     59: states to the state lists.
                     60: 
                     61: I wrote some thread-safe, not-limited code to try something similar at the time
                     62: of checking for duplicates (instead of when adding states), using index vectors
                     63: on the stack. It did give a 13% improvement with one specially constructed
                     64: pattern for certain subject strings, but on other strings and on many of the
                     65: simpler patterns in the test suite it did worse. The major problem, I think,
                     66: was the extra time to initialize the index. This had to be done for each call
                     67: of internal_dfa_exec(). (The supplied patch used a static vector, initialized
                     68: only once - I suspect this was the cause of the problems with the tests.)
                     69: 
                     70: Overall, I concluded that the gains in some cases did not outweigh the losses
                     71: in others, so I abandoned this code. */
                     72: 
                     73: 
                     74: 
                     75: #ifdef HAVE_CONFIG_H
                     76: #include "config.h"
                     77: #endif
                     78: 
                     79: #define NLBLOCK md             /* Block containing newline information */
                     80: #define PSSTART start_subject  /* Field containing processed string start */
                     81: #define PSEND   end_subject    /* Field containing processed string end */
                     82: 
                     83: #include "pcre_internal.h"
                     84: 
                     85: 
                     86: /* For use to indent debugging output */
                     87: 
                     88: #define SP "                   "
                     89: 
                     90: 
                     91: /*************************************************
                     92: *      Code parameters and static tables         *
                     93: *************************************************/
                     94: 
                     95: /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
                     96: into others, under special conditions. A gap of 20 between the blocks should be
                     97: enough. The resulting opcodes don't have to be less than 256 because they are
                     98: never stored, so we push them well clear of the normal opcodes. */
                     99: 
                    100: #define OP_PROP_EXTRA       300
                    101: #define OP_EXTUNI_EXTRA     320
                    102: #define OP_ANYNL_EXTRA      340
                    103: #define OP_HSPACE_EXTRA     360
                    104: #define OP_VSPACE_EXTRA     380
                    105: 
                    106: 
                    107: /* This table identifies those opcodes that are followed immediately by a
                    108: character that is to be tested in some way. This makes it possible to
                    109: centralize the loading of these characters. In the case of Type * etc, the
                    110: "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
                    111: small value. Non-zero values in the table are the offsets from the opcode where
                    112: the character is to be found. ***NOTE*** If the start of this table is
                    113: modified, the three tables that follow must also be modified. */
                    114: 
1.1.1.2   misho     115: static const pcre_uint8 coptable[] = {
1.1       misho     116:   0,                             /* End                                    */
                    117:   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
                    118:   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
                    119:   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
                    120:   0, 0,                          /* \P, \p                                 */
                    121:   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
                    122:   0,                             /* \X                                     */
                    123:   0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
                    124:   1,                             /* Char                                   */
                    125:   1,                             /* Chari                                  */
                    126:   1,                             /* not                                    */
                    127:   1,                             /* noti                                   */
                    128:   /* Positive single-char repeats                                          */
                    129:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
1.1.1.2   misho     130:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
                    131:   1+IMM2_SIZE,                   /* exact                                  */
                    132:   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
1.1       misho     133:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
1.1.1.2   misho     134:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
                    135:   1+IMM2_SIZE,                   /* exact I                                */
                    136:   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
1.1       misho     137:   /* Negative single-char repeats - only for chars < 256                   */
                    138:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
1.1.1.2   misho     139:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
                    140:   1+IMM2_SIZE,                   /* NOT exact                              */
                    141:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
1.1       misho     142:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
1.1.1.2   misho     143:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
                    144:   1+IMM2_SIZE,                   /* NOT exact I                            */
                    145:   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
1.1       misho     146:   /* Positive type repeats                                                 */
                    147:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
1.1.1.2   misho     148:   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
                    149:   1+IMM2_SIZE,                   /* Type exact                             */
                    150:   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
1.1       misho     151:   /* Character class & ref repeats                                         */
                    152:   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
                    153:   0, 0,                          /* CRRANGE, CRMINRANGE                    */
                    154:   0,                             /* CLASS                                  */
                    155:   0,                             /* NCLASS                                 */
                    156:   0,                             /* XCLASS - variable length               */
                    157:   0,                             /* REF                                    */
                    158:   0,                             /* REFI                                   */
                    159:   0,                             /* RECURSE                                */
                    160:   0,                             /* CALLOUT                                */
                    161:   0,                             /* Alt                                    */
                    162:   0,                             /* Ket                                    */
                    163:   0,                             /* KetRmax                                */
                    164:   0,                             /* KetRmin                                */
                    165:   0,                             /* KetRpos                                */
                    166:   0,                             /* Reverse                                */
                    167:   0,                             /* Assert                                 */
                    168:   0,                             /* Assert not                             */
                    169:   0,                             /* Assert behind                          */
                    170:   0,                             /* Assert behind not                      */
                    171:   0, 0,                          /* ONCE, ONCE_NC                          */
                    172:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    173:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
                    174:   0, 0,                          /* CREF, NCREF                            */
                    175:   0, 0,                          /* RREF, NRREF                            */
                    176:   0,                             /* DEF                                    */
                    177:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    178:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    179:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    180:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    181:   0, 0                           /* CLOSE, SKIPZERO  */
                    182: };
                    183: 
                    184: /* This table identifies those opcodes that inspect a character. It is used to
                    185: remember the fact that a character could have been inspected when the end of
                    186: the subject is reached. ***NOTE*** If the start of this table is modified, the
                    187: two tables that follow must also be modified. */
                    188: 
1.1.1.2   misho     189: static const pcre_uint8 poptable[] = {
1.1       misho     190:   0,                             /* End                                    */
                    191:   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
                    192:   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
                    193:   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
                    194:   1, 1,                          /* \P, \p                                 */
                    195:   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
                    196:   1,                             /* \X                                     */
                    197:   0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
                    198:   1,                             /* Char                                   */
                    199:   1,                             /* Chari                                  */
                    200:   1,                             /* not                                    */
                    201:   1,                             /* noti                                   */
                    202:   /* Positive single-char repeats                                          */
                    203:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    204:   1, 1, 1,                       /* upto, minupto, exact                   */
                    205:   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
                    206:   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
                    207:   1, 1, 1,                       /* upto I, minupto I, exact I             */
                    208:   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
                    209:   /* Negative single-char repeats - only for chars < 256                   */
                    210:   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
                    211:   1, 1, 1,                       /* NOT upto, minupto, exact               */
                    212:   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
                    213:   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
                    214:   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
                    215:   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
                    216:   /* Positive type repeats                                                 */
                    217:   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
                    218:   1, 1, 1,                       /* Type upto, minupto, exact              */
                    219:   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
                    220:   /* Character class & ref repeats                                         */
                    221:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
                    222:   1, 1,                          /* CRRANGE, CRMINRANGE                    */
                    223:   1,                             /* CLASS                                  */
                    224:   1,                             /* NCLASS                                 */
                    225:   1,                             /* XCLASS - variable length               */
                    226:   0,                             /* REF                                    */
                    227:   0,                             /* REFI                                   */
                    228:   0,                             /* RECURSE                                */
                    229:   0,                             /* CALLOUT                                */
                    230:   0,                             /* Alt                                    */
                    231:   0,                             /* Ket                                    */
                    232:   0,                             /* KetRmax                                */
                    233:   0,                             /* KetRmin                                */
                    234:   0,                             /* KetRpos                                */
                    235:   0,                             /* Reverse                                */
                    236:   0,                             /* Assert                                 */
                    237:   0,                             /* Assert not                             */
                    238:   0,                             /* Assert behind                          */
                    239:   0,                             /* Assert behind not                      */
                    240:   0, 0,                          /* ONCE, ONCE_NC                          */
                    241:   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
                    242:   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
                    243:   0, 0,                          /* CREF, NCREF                            */
                    244:   0, 0,                          /* RREF, NRREF                            */
                    245:   0,                             /* DEF                                    */
                    246:   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
                    247:   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
                    248:   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
                    249:   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
                    250:   0, 0                           /* CLOSE, SKIPZERO                        */
                    251: };
                    252: 
                    253: /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
                    254: and \w */
                    255: 
1.1.1.2   misho     256: static const pcre_uint8 toptable1[] = {
1.1       misho     257:   0, 0, 0, 0, 0, 0,
                    258:   ctype_digit, ctype_digit,
                    259:   ctype_space, ctype_space,
                    260:   ctype_word,  ctype_word,
                    261:   0, 0                            /* OP_ANY, OP_ALLANY */
                    262: };
                    263: 
1.1.1.2   misho     264: static const pcre_uint8 toptable2[] = {
1.1       misho     265:   0, 0, 0, 0, 0, 0,
                    266:   ctype_digit, 0,
                    267:   ctype_space, 0,
                    268:   ctype_word,  0,
                    269:   1, 1                            /* OP_ANY, OP_ALLANY */
                    270: };
                    271: 
                    272: 
                    273: /* Structure for holding data about a particular state, which is in effect the
                    274: current data for an active path through the match tree. It must consist
                    275: entirely of ints because the working vector we are passed, and which we put
                    276: these structures in, is a vector of ints. */
                    277: 
                    278: typedef struct stateblock {
                    279:   int offset;                     /* Offset to opcode */
                    280:   int count;                      /* Count for repeats */
                    281:   int data;                       /* Some use extra data */
                    282: } stateblock;
                    283: 
1.1.1.3 ! misho     284: #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
1.1       misho     285: 
                    286: 
                    287: #ifdef PCRE_DEBUG
                    288: /*************************************************
                    289: *             Print character string             *
                    290: *************************************************/
                    291: 
                    292: /* Character string printing function for debugging.
                    293: 
                    294: Arguments:
                    295:   p            points to string
                    296:   length       number of bytes
                    297:   f            where to print
                    298: 
                    299: Returns:       nothing
                    300: */
                    301: 
                    302: static void
1.1.1.2   misho     303: pchars(const pcre_uchar *p, int length, FILE *f)
1.1       misho     304: {
                    305: int c;
                    306: while (length-- > 0)
                    307:   {
                    308:   if (isprint(c = *(p++)))
                    309:     fprintf(f, "%c", c);
                    310:   else
                    311:     fprintf(f, "\\x%02x", c);
                    312:   }
                    313: }
                    314: #endif
                    315: 
                    316: 
                    317: 
                    318: /*************************************************
                    319: *    Execute a Regular Expression - DFA engine   *
                    320: *************************************************/
                    321: 
                    322: /* This internal function applies a compiled pattern to a subject string,
                    323: starting at a given point, using a DFA engine. This function is called from the
                    324: external one, possibly multiple times if the pattern is not anchored. The
                    325: function calls itself recursively for some kinds of subpattern.
                    326: 
                    327: Arguments:
                    328:   md                the match_data block with fixed information
                    329:   this_start_code   the opening bracket of this subexpression's code
                    330:   current_subject   where we currently are in the subject string
                    331:   start_offset      start offset in the subject string
                    332:   offsets           vector to contain the matching string offsets
                    333:   offsetcount       size of same
                    334:   workspace         vector of workspace
                    335:   wscount           size of same
                    336:   rlevel            function call recursion level
                    337: 
                    338: Returns:            > 0 => number of match offset pairs placed in offsets
                    339:                     = 0 => offsets overflowed; longest matches are present
                    340:                      -1 => failed to match
                    341:                    < -1 => some kind of unexpected problem
                    342: 
                    343: The following macros are used for adding states to the two state vectors (one
                    344: for the current character, one for the following character). */
                    345: 
                    346: #define ADD_ACTIVE(x,y) \
                    347:   if (active_count++ < wscount) \
                    348:     { \
                    349:     next_active_state->offset = (x); \
                    350:     next_active_state->count  = (y); \
                    351:     next_active_state++; \
                    352:     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    353:     } \
                    354:   else return PCRE_ERROR_DFA_WSSIZE
                    355: 
                    356: #define ADD_ACTIVE_DATA(x,y,z) \
                    357:   if (active_count++ < wscount) \
                    358:     { \
                    359:     next_active_state->offset = (x); \
                    360:     next_active_state->count  = (y); \
                    361:     next_active_state->data   = (z); \
                    362:     next_active_state++; \
                    363:     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
                    364:     } \
                    365:   else return PCRE_ERROR_DFA_WSSIZE
                    366: 
                    367: #define ADD_NEW(x,y) \
                    368:   if (new_count++ < wscount) \
                    369:     { \
                    370:     next_new_state->offset = (x); \
                    371:     next_new_state->count  = (y); \
                    372:     next_new_state++; \
                    373:     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
                    374:     } \
                    375:   else return PCRE_ERROR_DFA_WSSIZE
                    376: 
                    377: #define ADD_NEW_DATA(x,y,z) \
                    378:   if (new_count++ < wscount) \
                    379:     { \
                    380:     next_new_state->offset = (x); \
                    381:     next_new_state->count  = (y); \
                    382:     next_new_state->data   = (z); \
                    383:     next_new_state++; \
1.1.1.3 ! misho     384:     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
        !           385:       (x), (y), (z), __LINE__)); \
1.1       misho     386:     } \
                    387:   else return PCRE_ERROR_DFA_WSSIZE
                    388: 
                    389: /* And now, here is the code */
                    390: 
                    391: static int
                    392: internal_dfa_exec(
                    393:   dfa_match_data *md,
1.1.1.2   misho     394:   const pcre_uchar *this_start_code,
                    395:   const pcre_uchar *current_subject,
1.1       misho     396:   int start_offset,
                    397:   int *offsets,
                    398:   int offsetcount,
                    399:   int *workspace,
                    400:   int wscount,
                    401:   int  rlevel)
                    402: {
                    403: stateblock *active_states, *new_states, *temp_states;
                    404: stateblock *next_active_state, *next_new_state;
                    405: 
1.1.1.2   misho     406: const pcre_uint8 *ctypes, *lcc, *fcc;
                    407: const pcre_uchar *ptr;
                    408: const pcre_uchar *end_code, *first_op;
1.1       misho     409: 
                    410: dfa_recursion_info new_recursive;
                    411: 
                    412: int active_count, new_count, match_count;
                    413: 
                    414: /* Some fields in the md block are frequently referenced, so we load them into
                    415: independent variables in the hope that this will perform better. */
                    416: 
1.1.1.2   misho     417: const pcre_uchar *start_subject = md->start_subject;
                    418: const pcre_uchar *end_subject = md->end_subject;
                    419: const pcre_uchar *start_code = md->start_code;
1.1       misho     420: 
1.1.1.2   misho     421: #ifdef SUPPORT_UTF
                    422: BOOL utf = (md->poptions & PCRE_UTF8) != 0;
1.1       misho     423: #else
1.1.1.2   misho     424: BOOL utf = FALSE;
1.1       misho     425: #endif
                    426: 
1.1.1.3 ! misho     427: BOOL reset_could_continue = FALSE;
        !           428: 
1.1       misho     429: rlevel++;
                    430: offsetcount &= (-2);
                    431: 
                    432: wscount -= 2;
                    433: wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
                    434:           (2 * INTS_PER_STATEBLOCK);
                    435: 
                    436: DPRINTF(("\n%.*s---------------------\n"
                    437:   "%.*sCall to internal_dfa_exec f=%d\n",
                    438:   rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
                    439: 
                    440: ctypes = md->tables + ctypes_offset;
                    441: lcc = md->tables + lcc_offset;
                    442: fcc = md->tables + fcc_offset;
                    443: 
                    444: match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
                    445: 
                    446: active_states = (stateblock *)(workspace + 2);
                    447: next_new_state = new_states = active_states + wscount;
                    448: new_count = 0;
                    449: 
                    450: first_op = this_start_code + 1 + LINK_SIZE +
                    451:   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
1.1.1.2   misho     452:     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
                    453:     ? IMM2_SIZE:0);
1.1       misho     454: 
                    455: /* The first thing in any (sub) pattern is a bracket of some sort. Push all
                    456: the alternative states onto the list, and find out where the end is. This
                    457: makes is possible to use this function recursively, when we want to stop at a
                    458: matching internal ket rather than at the end.
                    459: 
                    460: If the first opcode in the first alternative is OP_REVERSE, we are dealing with
                    461: a backward assertion. In that case, we have to find out the maximum amount to
                    462: move back, and set up each alternative appropriately. */
                    463: 
                    464: if (*first_op == OP_REVERSE)
                    465:   {
                    466:   int max_back = 0;
                    467:   int gone_back;
                    468: 
                    469:   end_code = this_start_code;
                    470:   do
                    471:     {
                    472:     int back = GET(end_code, 2+LINK_SIZE);
                    473:     if (back > max_back) max_back = back;
                    474:     end_code += GET(end_code, 1);
                    475:     }
                    476:   while (*end_code == OP_ALT);
                    477: 
                    478:   /* If we can't go back the amount required for the longest lookbehind
                    479:   pattern, go back as far as we can; some alternatives may still be viable. */
                    480: 
1.1.1.2   misho     481: #ifdef SUPPORT_UTF
1.1       misho     482:   /* In character mode we have to step back character by character */
                    483: 
1.1.1.2   misho     484:   if (utf)
1.1       misho     485:     {
                    486:     for (gone_back = 0; gone_back < max_back; gone_back++)
                    487:       {
                    488:       if (current_subject <= start_subject) break;
                    489:       current_subject--;
1.1.1.2   misho     490:       ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
1.1       misho     491:       }
                    492:     }
                    493:   else
                    494: #endif
                    495: 
                    496:   /* In byte-mode we can do this quickly. */
                    497: 
                    498:     {
                    499:     gone_back = (current_subject - max_back < start_subject)?
                    500:       (int)(current_subject - start_subject) : max_back;
                    501:     current_subject -= gone_back;
                    502:     }
                    503: 
                    504:   /* Save the earliest consulted character */
                    505: 
                    506:   if (current_subject < md->start_used_ptr)
                    507:     md->start_used_ptr = current_subject;
                    508: 
                    509:   /* Now we can process the individual branches. */
                    510: 
                    511:   end_code = this_start_code;
                    512:   do
                    513:     {
                    514:     int back = GET(end_code, 2+LINK_SIZE);
                    515:     if (back <= gone_back)
                    516:       {
                    517:       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
                    518:       ADD_NEW_DATA(-bstate, 0, gone_back - back);
                    519:       }
                    520:     end_code += GET(end_code, 1);
                    521:     }
                    522:   while (*end_code == OP_ALT);
                    523:  }
                    524: 
                    525: /* This is the code for a "normal" subpattern (not a backward assertion). The
                    526: start of a whole pattern is always one of these. If we are at the top level,
                    527: we may be asked to restart matching from the same point that we reached for a
                    528: previous partial match. We still have to scan through the top-level branches to
                    529: find the end state. */
                    530: 
                    531: else
                    532:   {
                    533:   end_code = this_start_code;
                    534: 
                    535:   /* Restarting */
                    536: 
                    537:   if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
                    538:     {
                    539:     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
                    540:     new_count = workspace[1];
                    541:     if (!workspace[0])
                    542:       memcpy(new_states, active_states, new_count * sizeof(stateblock));
                    543:     }
                    544: 
                    545:   /* Not restarting */
                    546: 
                    547:   else
                    548:     {
                    549:     int length = 1 + LINK_SIZE +
                    550:       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
1.1.1.2   misho     551:         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
                    552:         ? IMM2_SIZE:0);
1.1       misho     553:     do
                    554:       {
                    555:       ADD_NEW((int)(end_code - start_code + length), 0);
                    556:       end_code += GET(end_code, 1);
                    557:       length = 1 + LINK_SIZE;
                    558:       }
                    559:     while (*end_code == OP_ALT);
                    560:     }
                    561:   }
                    562: 
                    563: workspace[0] = 0;    /* Bit indicating which vector is current */
                    564: 
1.1.1.2   misho     565: DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
1.1       misho     566: 
                    567: /* Loop for scanning the subject */
                    568: 
                    569: ptr = current_subject;
                    570: for (;;)
                    571:   {
                    572:   int i, j;
                    573:   int clen, dlen;
                    574:   unsigned int c, d;
                    575:   int forced_fail = 0;
1.1.1.3 ! misho     576:   BOOL partial_newline = FALSE;
        !           577:   BOOL could_continue = reset_could_continue;
        !           578:   reset_could_continue = FALSE;
1.1       misho     579: 
                    580:   /* Make the new state list into the active state list and empty the
                    581:   new state list. */
                    582: 
                    583:   temp_states = active_states;
                    584:   active_states = new_states;
                    585:   new_states = temp_states;
                    586:   active_count = new_count;
                    587:   new_count = 0;
                    588: 
                    589:   workspace[0] ^= 1;              /* Remember for the restarting feature */
                    590:   workspace[1] = active_count;
                    591: 
                    592: #ifdef PCRE_DEBUG
                    593:   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
1.1.1.2   misho     594:   pchars(ptr, STRLEN_UC(ptr), stdout);
1.1       misho     595:   printf("\"\n");
                    596: 
                    597:   printf("%.*sActive states: ", rlevel*2-2, SP);
                    598:   for (i = 0; i < active_count; i++)
                    599:     printf("%d/%d ", active_states[i].offset, active_states[i].count);
                    600:   printf("\n");
                    601: #endif
                    602: 
                    603:   /* Set the pointers for adding new states */
                    604: 
                    605:   next_active_state = active_states + active_count;
                    606:   next_new_state = new_states;
                    607: 
                    608:   /* Load the current character from the subject outside the loop, as many
                    609:   different states may want to look at it, and we assume that at least one
                    610:   will. */
                    611: 
                    612:   if (ptr < end_subject)
                    613:     {
1.1.1.3 ! misho     614:     clen = 1;        /* Number of data items in the character */
1.1.1.2   misho     615: #ifdef SUPPORT_UTF
                    616:     if (utf) { GETCHARLEN(c, ptr, clen); } else
                    617: #endif  /* SUPPORT_UTF */
1.1       misho     618:     c = *ptr;
                    619:     }
                    620:   else
                    621:     {
                    622:     clen = 0;        /* This indicates the end of the subject */
                    623:     c = NOTACHAR;    /* This value should never actually be used */
                    624:     }
                    625: 
                    626:   /* Scan up the active states and act on each one. The result of an action
                    627:   may be to add more states to the currently active list (e.g. on hitting a
                    628:   parenthesis) or it may be to put states on the new list, for considering
                    629:   when we move the character pointer on. */
                    630: 
                    631:   for (i = 0; i < active_count; i++)
                    632:     {
                    633:     stateblock *current_state = active_states + i;
                    634:     BOOL caseless = FALSE;
1.1.1.2   misho     635:     const pcre_uchar *code;
1.1       misho     636:     int state_offset = current_state->offset;
                    637:     int count, codevalue, rrc;
                    638: 
                    639: #ifdef PCRE_DEBUG
                    640:     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
                    641:     if (clen == 0) printf("EOL\n");
                    642:       else if (c > 32 && c < 127) printf("'%c'\n", c);
                    643:         else printf("0x%02x\n", c);
                    644: #endif
                    645: 
                    646:     /* A negative offset is a special case meaning "hold off going to this
                    647:     (negated) state until the number of characters in the data field have
1.1.1.3 ! misho     648:     been skipped". If the could_continue flag was passed over from a previous
        !           649:     state, arrange for it to passed on. */
1.1       misho     650: 
                    651:     if (state_offset < 0)
                    652:       {
                    653:       if (current_state->data > 0)
                    654:         {
                    655:         DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
                    656:         ADD_NEW_DATA(state_offset, current_state->count,
                    657:           current_state->data - 1);
1.1.1.3 ! misho     658:         if (could_continue) reset_could_continue = TRUE;
1.1       misho     659:         continue;
                    660:         }
                    661:       else
                    662:         {
                    663:         current_state->offset = state_offset = -state_offset;
                    664:         }
                    665:       }
                    666: 
                    667:     /* Check for a duplicate state with the same count, and skip if found.
                    668:     See the note at the head of this module about the possibility of improving
                    669:     performance here. */
                    670: 
                    671:     for (j = 0; j < i; j++)
                    672:       {
                    673:       if (active_states[j].offset == state_offset &&
                    674:           active_states[j].count == current_state->count)
                    675:         {
                    676:         DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
                    677:         goto NEXT_ACTIVE_STATE;
                    678:         }
                    679:       }
                    680: 
                    681:     /* The state offset is the offset to the opcode */
                    682: 
                    683:     code = start_code + state_offset;
                    684:     codevalue = *code;
                    685: 
                    686:     /* If this opcode inspects a character, but we are at the end of the
                    687:     subject, remember the fact for use when testing for a partial match. */
                    688: 
                    689:     if (clen == 0 && poptable[codevalue] != 0)
                    690:       could_continue = TRUE;
                    691: 
                    692:     /* If this opcode is followed by an inline character, load it. It is
                    693:     tempting to test for the presence of a subject character here, but that
                    694:     is wrong, because sometimes zero repetitions of the subject are
                    695:     permitted.
                    696: 
                    697:     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
1.1.1.3 ! misho     698:     argument that is not a data character - but is always one byte long because
        !           699:     the values are small. We have to take special action to deal with  \P, \p,
        !           700:     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
        !           701:     these ones to new opcodes. */
1.1       misho     702: 
                    703:     if (coptable[codevalue] > 0)
                    704:       {
                    705:       dlen = 1;
1.1.1.2   misho     706: #ifdef SUPPORT_UTF
                    707:       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
                    708: #endif  /* SUPPORT_UTF */
1.1       misho     709:       d = code[coptable[codevalue]];
                    710:       if (codevalue >= OP_TYPESTAR)
                    711:         {
                    712:         switch(d)
                    713:           {
                    714:           case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
                    715:           case OP_NOTPROP:
                    716:           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
                    717:           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
                    718:           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
                    719:           case OP_NOT_HSPACE:
                    720:           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
                    721:           case OP_NOT_VSPACE:
                    722:           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
                    723:           default: break;
                    724:           }
                    725:         }
                    726:       }
                    727:     else
                    728:       {
                    729:       dlen = 0;         /* Not strictly necessary, but compilers moan */
                    730:       d = NOTACHAR;     /* if these variables are not set. */
                    731:       }
                    732: 
                    733: 
                    734:     /* Now process the individual opcodes */
                    735: 
                    736:     switch (codevalue)
                    737:       {
                    738: /* ========================================================================== */
                    739:       /* These cases are never obeyed. This is a fudge that causes a compile-
                    740:       time error if the vectors coptable or poptable, which are indexed by
                    741:       opcode, are not the correct length. It seems to be the only way to do
                    742:       such a check at compile time, as the sizeof() operator does not work
                    743:       in the C preprocessor. */
                    744: 
                    745:       case OP_TABLE_LENGTH:
                    746:       case OP_TABLE_LENGTH +
                    747:         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
                    748:          (sizeof(poptable) == OP_TABLE_LENGTH)):
                    749:       break;
                    750: 
                    751: /* ========================================================================== */
                    752:       /* Reached a closing bracket. If not at the end of the pattern, carry
                    753:       on with the next opcode. For repeating opcodes, also add the repeat
                    754:       state. Note that KETRPOS will always be encountered at the end of the
                    755:       subpattern, because the possessive subpattern repeats are always handled
                    756:       using recursive calls. Thus, it never adds any new states.
                    757: 
                    758:       At the end of the (sub)pattern, unless we have an empty string and
                    759:       PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
                    760:       start of the subject, save the match data, shifting up all previous
                    761:       matches so we always have the longest first. */
                    762: 
                    763:       case OP_KET:
                    764:       case OP_KETRMIN:
                    765:       case OP_KETRMAX:
                    766:       case OP_KETRPOS:
                    767:       if (code != end_code)
                    768:         {
                    769:         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
                    770:         if (codevalue != OP_KET)
                    771:           {
                    772:           ADD_ACTIVE(state_offset - GET(code, 1), 0);
                    773:           }
                    774:         }
                    775:       else
                    776:         {
                    777:         if (ptr > current_subject ||
                    778:             ((md->moptions & PCRE_NOTEMPTY) == 0 &&
                    779:               ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
                    780:                 current_subject > start_subject + md->start_offset)))
                    781:           {
                    782:           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
                    783:             else if (match_count > 0 && ++match_count * 2 > offsetcount)
                    784:               match_count = 0;
                    785:           count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
                    786:           if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
                    787:           if (offsetcount >= 2)
                    788:             {
                    789:             offsets[0] = (int)(current_subject - start_subject);
                    790:             offsets[1] = (int)(ptr - start_subject);
                    791:             DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
1.1.1.3 ! misho     792:               offsets[1] - offsets[0], (char *)current_subject));
1.1       misho     793:             }
                    794:           if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
                    795:             {
                    796:             DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                    797:               "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
                    798:               match_count, rlevel*2-2, SP));
                    799:             return match_count;
                    800:             }
                    801:           }
                    802:         }
                    803:       break;
                    804: 
                    805: /* ========================================================================== */
                    806:       /* These opcodes add to the current list of states without looking
                    807:       at the current character. */
                    808: 
                    809:       /*-----------------------------------------------------------------*/
                    810:       case OP_ALT:
                    811:       do { code += GET(code, 1); } while (*code == OP_ALT);
                    812:       ADD_ACTIVE((int)(code - start_code), 0);
                    813:       break;
                    814: 
                    815:       /*-----------------------------------------------------------------*/
                    816:       case OP_BRA:
                    817:       case OP_SBRA:
                    818:       do
                    819:         {
                    820:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    821:         code += GET(code, 1);
                    822:         }
                    823:       while (*code == OP_ALT);
                    824:       break;
                    825: 
                    826:       /*-----------------------------------------------------------------*/
                    827:       case OP_CBRA:
                    828:       case OP_SCBRA:
1.1.1.2   misho     829:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
1.1       misho     830:       code += GET(code, 1);
                    831:       while (*code == OP_ALT)
                    832:         {
                    833:         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
                    834:         code += GET(code, 1);
                    835:         }
                    836:       break;
                    837: 
                    838:       /*-----------------------------------------------------------------*/
                    839:       case OP_BRAZERO:
                    840:       case OP_BRAMINZERO:
                    841:       ADD_ACTIVE(state_offset + 1, 0);
                    842:       code += 1 + GET(code, 2);
                    843:       while (*code == OP_ALT) code += GET(code, 1);
                    844:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    845:       break;
                    846: 
                    847:       /*-----------------------------------------------------------------*/
                    848:       case OP_SKIPZERO:
                    849:       code += 1 + GET(code, 2);
                    850:       while (*code == OP_ALT) code += GET(code, 1);
                    851:       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
                    852:       break;
                    853: 
                    854:       /*-----------------------------------------------------------------*/
                    855:       case OP_CIRC:
                    856:       if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
                    857:         { ADD_ACTIVE(state_offset + 1, 0); }
                    858:       break;
                    859: 
                    860:       /*-----------------------------------------------------------------*/
                    861:       case OP_CIRCM:
                    862:       if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
                    863:           (ptr != end_subject && WAS_NEWLINE(ptr)))
                    864:         { ADD_ACTIVE(state_offset + 1, 0); }
                    865:       break;
                    866: 
                    867:       /*-----------------------------------------------------------------*/
                    868:       case OP_EOD:
                    869:       if (ptr >= end_subject)
                    870:         {
                    871:         if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
                    872:           could_continue = TRUE;
                    873:         else { ADD_ACTIVE(state_offset + 1, 0); }
                    874:         }
                    875:       break;
                    876: 
                    877:       /*-----------------------------------------------------------------*/
                    878:       case OP_SOD:
                    879:       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
                    880:       break;
                    881: 
                    882:       /*-----------------------------------------------------------------*/
                    883:       case OP_SOM:
                    884:       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
                    885:       break;
                    886: 
                    887: 
                    888: /* ========================================================================== */
                    889:       /* These opcodes inspect the next subject character, and sometimes
                    890:       the previous one as well, but do not have an argument. The variable
                    891:       clen contains the length of the current character and is zero if we are
                    892:       at the end of the subject. */
                    893: 
                    894:       /*-----------------------------------------------------------------*/
                    895:       case OP_ANY:
                    896:       if (clen > 0 && !IS_NEWLINE(ptr))
1.1.1.3 ! misho     897:         {
        !           898:         if (ptr + 1 >= md->end_subject &&
        !           899:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
        !           900:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !           901:             NLBLOCK->nllen == 2 &&
        !           902:             c == NLBLOCK->nl[0])
        !           903:           {
        !           904:           could_continue = partial_newline = TRUE;
        !           905:           }
        !           906:         else
        !           907:           {
        !           908:           ADD_NEW(state_offset + 1, 0);
        !           909:           }
        !           910:         }
1.1       misho     911:       break;
                    912: 
                    913:       /*-----------------------------------------------------------------*/
                    914:       case OP_ALLANY:
                    915:       if (clen > 0)
                    916:         { ADD_NEW(state_offset + 1, 0); }
                    917:       break;
                    918: 
                    919:       /*-----------------------------------------------------------------*/
                    920:       case OP_EODN:
                    921:       if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    922:         could_continue = TRUE;
                    923:       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
                    924:         { ADD_ACTIVE(state_offset + 1, 0); }
                    925:       break;
                    926: 
                    927:       /*-----------------------------------------------------------------*/
                    928:       case OP_DOLL:
                    929:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    930:         {
                    931:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    932:           could_continue = TRUE;
                    933:         else if (clen == 0 ||
                    934:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
                    935:                (ptr == end_subject - md->nllen)
                    936:             ))
                    937:           { ADD_ACTIVE(state_offset + 1, 0); }
1.1.1.3 ! misho     938:         else if (ptr + 1 >= md->end_subject &&
        !           939:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
        !           940:                  NLBLOCK->nltype == NLTYPE_FIXED &&
        !           941:                  NLBLOCK->nllen == 2 &&
        !           942:                  c == NLBLOCK->nl[0])
        !           943:           {
        !           944:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
        !           945:             {
        !           946:             reset_could_continue = TRUE;
        !           947:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
        !           948:             }
        !           949:           else could_continue = partial_newline = TRUE;
        !           950:           }
1.1       misho     951:         }
                    952:       break;
                    953: 
                    954:       /*-----------------------------------------------------------------*/
                    955:       case OP_DOLLM:
                    956:       if ((md->moptions & PCRE_NOTEOL) == 0)
                    957:         {
                    958:         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
                    959:           could_continue = TRUE;
                    960:         else if (clen == 0 ||
                    961:             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
                    962:           { ADD_ACTIVE(state_offset + 1, 0); }
1.1.1.3 ! misho     963:         else if (ptr + 1 >= md->end_subject &&
        !           964:                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
        !           965:                  NLBLOCK->nltype == NLTYPE_FIXED &&
        !           966:                  NLBLOCK->nllen == 2 &&
        !           967:                  c == NLBLOCK->nl[0])
        !           968:           {
        !           969:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
        !           970:             {
        !           971:             reset_could_continue = TRUE;
        !           972:             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
        !           973:             }
        !           974:           else could_continue = partial_newline = TRUE;
        !           975:           }
1.1       misho     976:         }
                    977:       else if (IS_NEWLINE(ptr))
                    978:         { ADD_ACTIVE(state_offset + 1, 0); }
                    979:       break;
                    980: 
                    981:       /*-----------------------------------------------------------------*/
                    982: 
                    983:       case OP_DIGIT:
                    984:       case OP_WHITESPACE:
                    985:       case OP_WORDCHAR:
                    986:       if (clen > 0 && c < 256 &&
                    987:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
                    988:         { ADD_NEW(state_offset + 1, 0); }
                    989:       break;
                    990: 
                    991:       /*-----------------------------------------------------------------*/
                    992:       case OP_NOT_DIGIT:
                    993:       case OP_NOT_WHITESPACE:
                    994:       case OP_NOT_WORDCHAR:
                    995:       if (clen > 0 && (c >= 256 ||
                    996:             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
                    997:         { ADD_NEW(state_offset + 1, 0); }
                    998:       break;
                    999: 
                   1000:       /*-----------------------------------------------------------------*/
                   1001:       case OP_WORD_BOUNDARY:
                   1002:       case OP_NOT_WORD_BOUNDARY:
                   1003:         {
                   1004:         int left_word, right_word;
                   1005: 
                   1006:         if (ptr > start_subject)
                   1007:           {
1.1.1.2   misho    1008:           const pcre_uchar *temp = ptr - 1;
1.1       misho    1009:           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1.1.1.2   misho    1010: #ifdef SUPPORT_UTF
                   1011:           if (utf) { BACKCHAR(temp); }
1.1       misho    1012: #endif
                   1013:           GETCHARTEST(d, temp);
                   1014: #ifdef SUPPORT_UCP
                   1015:           if ((md->poptions & PCRE_UCP) != 0)
                   1016:             {
                   1017:             if (d == '_') left_word = TRUE; else
                   1018:               {
                   1019:               int cat = UCD_CATEGORY(d);
                   1020:               left_word = (cat == ucp_L || cat == ucp_N);
                   1021:               }
                   1022:             }
                   1023:           else
                   1024: #endif
                   1025:           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
                   1026:           }
                   1027:         else left_word = FALSE;
                   1028: 
                   1029:         if (clen > 0)
                   1030:           {
                   1031: #ifdef SUPPORT_UCP
                   1032:           if ((md->poptions & PCRE_UCP) != 0)
                   1033:             {
                   1034:             if (c == '_') right_word = TRUE; else
                   1035:               {
                   1036:               int cat = UCD_CATEGORY(c);
                   1037:               right_word = (cat == ucp_L || cat == ucp_N);
                   1038:               }
                   1039:             }
                   1040:           else
                   1041: #endif
                   1042:           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
                   1043:           }
                   1044:         else right_word = FALSE;
                   1045: 
                   1046:         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
                   1047:           { ADD_ACTIVE(state_offset + 1, 0); }
                   1048:         }
                   1049:       break;
                   1050: 
                   1051: 
                   1052:       /*-----------------------------------------------------------------*/
                   1053:       /* Check the next character by Unicode property. We will get here only
                   1054:       if the support is in the binary; otherwise a compile-time error occurs.
                   1055:       */
                   1056: 
                   1057: #ifdef SUPPORT_UCP
                   1058:       case OP_PROP:
                   1059:       case OP_NOTPROP:
                   1060:       if (clen > 0)
                   1061:         {
                   1062:         BOOL OK;
                   1063:         const ucd_record * prop = GET_UCD(c);
                   1064:         switch(code[1])
                   1065:           {
                   1066:           case PT_ANY:
                   1067:           OK = TRUE;
                   1068:           break;
                   1069: 
                   1070:           case PT_LAMP:
                   1071:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1072:                prop->chartype == ucp_Lt;
                   1073:           break;
                   1074: 
                   1075:           case PT_GC:
1.1.1.2   misho    1076:           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1.1       misho    1077:           break;
                   1078: 
                   1079:           case PT_PC:
                   1080:           OK = prop->chartype == code[2];
                   1081:           break;
                   1082: 
                   1083:           case PT_SC:
                   1084:           OK = prop->script == code[2];
                   1085:           break;
                   1086: 
                   1087:           /* These are specials for combination cases. */
                   1088: 
                   1089:           case PT_ALNUM:
1.1.1.2   misho    1090:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1091:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1092:           break;
                   1093: 
                   1094:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1095:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1096:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1097:           break;
                   1098: 
                   1099:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1100:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1101:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1102:                c == CHAR_FF || c == CHAR_CR;
                   1103:           break;
                   1104: 
                   1105:           case PT_WORD:
1.1.1.2   misho    1106:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1107:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1108:                c == CHAR_UNDERSCORE;
                   1109:           break;
                   1110: 
                   1111:           /* Should never occur, but keep compilers from grumbling. */
                   1112: 
                   1113:           default:
                   1114:           OK = codevalue != OP_PROP;
                   1115:           break;
                   1116:           }
                   1117: 
                   1118:         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
                   1119:         }
                   1120:       break;
                   1121: #endif
                   1122: 
                   1123: 
                   1124: 
                   1125: /* ========================================================================== */
                   1126:       /* These opcodes likewise inspect the subject character, but have an
                   1127:       argument that is not a data character. It is one of these opcodes:
                   1128:       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
                   1129:       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
                   1130: 
                   1131:       case OP_TYPEPLUS:
                   1132:       case OP_TYPEMINPLUS:
                   1133:       case OP_TYPEPOSPLUS:
                   1134:       count = current_state->count;  /* Already matched */
                   1135:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1136:       if (clen > 0)
                   1137:         {
1.1.1.3 ! misho    1138:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
        !          1139:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
        !          1140:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          1141:             NLBLOCK->nllen == 2 &&
        !          1142:             c == NLBLOCK->nl[0])
        !          1143:           {
        !          1144:           could_continue = partial_newline = TRUE;
        !          1145:           }
        !          1146:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1147:             (c < 256 &&
                   1148:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1149:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1150:           {
                   1151:           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
                   1152:             {
                   1153:             active_count--;            /* Remove non-match possibility */
                   1154:             next_active_state--;
                   1155:             }
                   1156:           count++;
                   1157:           ADD_NEW(state_offset, count);
                   1158:           }
                   1159:         }
                   1160:       break;
                   1161: 
                   1162:       /*-----------------------------------------------------------------*/
                   1163:       case OP_TYPEQUERY:
                   1164:       case OP_TYPEMINQUERY:
                   1165:       case OP_TYPEPOSQUERY:
                   1166:       ADD_ACTIVE(state_offset + 2, 0);
                   1167:       if (clen > 0)
                   1168:         {
1.1.1.3 ! misho    1169:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
        !          1170:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
        !          1171:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          1172:             NLBLOCK->nllen == 2 &&
        !          1173:             c == NLBLOCK->nl[0])
        !          1174:           {
        !          1175:           could_continue = partial_newline = TRUE;
        !          1176:           }
        !          1177:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1178:             (c < 256 &&
                   1179:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1180:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1181:           {
                   1182:           if (codevalue == OP_TYPEPOSQUERY)
                   1183:             {
                   1184:             active_count--;            /* Remove non-match possibility */
                   1185:             next_active_state--;
                   1186:             }
                   1187:           ADD_NEW(state_offset + 2, 0);
                   1188:           }
                   1189:         }
                   1190:       break;
                   1191: 
                   1192:       /*-----------------------------------------------------------------*/
                   1193:       case OP_TYPESTAR:
                   1194:       case OP_TYPEMINSTAR:
                   1195:       case OP_TYPEPOSSTAR:
                   1196:       ADD_ACTIVE(state_offset + 2, 0);
                   1197:       if (clen > 0)
                   1198:         {
1.1.1.3 ! misho    1199:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
        !          1200:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
        !          1201:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          1202:             NLBLOCK->nllen == 2 &&
        !          1203:             c == NLBLOCK->nl[0])
        !          1204:           {
        !          1205:           could_continue = partial_newline = TRUE;
        !          1206:           }
        !          1207:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1208:             (c < 256 &&
                   1209:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1210:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1211:           {
                   1212:           if (codevalue == OP_TYPEPOSSTAR)
                   1213:             {
                   1214:             active_count--;            /* Remove non-match possibility */
                   1215:             next_active_state--;
                   1216:             }
                   1217:           ADD_NEW(state_offset, 0);
                   1218:           }
                   1219:         }
                   1220:       break;
                   1221: 
                   1222:       /*-----------------------------------------------------------------*/
                   1223:       case OP_TYPEEXACT:
                   1224:       count = current_state->count;  /* Number already matched */
                   1225:       if (clen > 0)
                   1226:         {
1.1.1.3 ! misho    1227:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
        !          1228:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
        !          1229:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          1230:             NLBLOCK->nllen == 2 &&
        !          1231:             c == NLBLOCK->nl[0])
        !          1232:           {
        !          1233:           could_continue = partial_newline = TRUE;
        !          1234:           }
        !          1235:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1236:             (c < 256 &&
                   1237:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1238:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1239:           {
                   1240:           if (++count >= GET2(code, 1))
1.1.1.2   misho    1241:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1.1       misho    1242:           else
                   1243:             { ADD_NEW(state_offset, count); }
                   1244:           }
                   1245:         }
                   1246:       break;
                   1247: 
                   1248:       /*-----------------------------------------------------------------*/
                   1249:       case OP_TYPEUPTO:
                   1250:       case OP_TYPEMINUPTO:
                   1251:       case OP_TYPEPOSUPTO:
1.1.1.2   misho    1252:       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1.1       misho    1253:       count = current_state->count;  /* Number already matched */
                   1254:       if (clen > 0)
                   1255:         {
1.1.1.3 ! misho    1256:         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
        !          1257:             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
        !          1258:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          1259:             NLBLOCK->nllen == 2 &&
        !          1260:             c == NLBLOCK->nl[0])
        !          1261:           {
        !          1262:           could_continue = partial_newline = TRUE;
        !          1263:           }
        !          1264:         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1.1       misho    1265:             (c < 256 &&
                   1266:               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                   1267:               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
                   1268:           {
                   1269:           if (codevalue == OP_TYPEPOSUPTO)
                   1270:             {
                   1271:             active_count--;           /* Remove non-match possibility */
                   1272:             next_active_state--;
                   1273:             }
                   1274:           if (++count >= GET2(code, 1))
1.1.1.2   misho    1275:             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1276:           else
                   1277:             { ADD_NEW(state_offset, count); }
                   1278:           }
                   1279:         }
                   1280:       break;
                   1281: 
                   1282: /* ========================================================================== */
                   1283:       /* These are virtual opcodes that are used when something like
                   1284:       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
                   1285:       argument. It keeps the code above fast for the other cases. The argument
                   1286:       is in the d variable. */
                   1287: 
                   1288: #ifdef SUPPORT_UCP
                   1289:       case OP_PROP_EXTRA + OP_TYPEPLUS:
                   1290:       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
                   1291:       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
                   1292:       count = current_state->count;           /* Already matched */
                   1293:       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
                   1294:       if (clen > 0)
                   1295:         {
                   1296:         BOOL OK;
                   1297:         const ucd_record * prop = GET_UCD(c);
                   1298:         switch(code[2])
                   1299:           {
                   1300:           case PT_ANY:
                   1301:           OK = TRUE;
                   1302:           break;
                   1303: 
                   1304:           case PT_LAMP:
                   1305:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1306:             prop->chartype == ucp_Lt;
                   1307:           break;
                   1308: 
                   1309:           case PT_GC:
1.1.1.2   misho    1310:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1.1       misho    1311:           break;
                   1312: 
                   1313:           case PT_PC:
                   1314:           OK = prop->chartype == code[3];
                   1315:           break;
                   1316: 
                   1317:           case PT_SC:
                   1318:           OK = prop->script == code[3];
                   1319:           break;
                   1320: 
                   1321:           /* These are specials for combination cases. */
                   1322: 
                   1323:           case PT_ALNUM:
1.1.1.2   misho    1324:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1325:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1326:           break;
                   1327: 
                   1328:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1329:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1330:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1331:           break;
                   1332: 
                   1333:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1334:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1335:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1336:                c == CHAR_FF || c == CHAR_CR;
                   1337:           break;
                   1338: 
                   1339:           case PT_WORD:
1.1.1.2   misho    1340:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1341:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1342:                c == CHAR_UNDERSCORE;
                   1343:           break;
                   1344: 
                   1345:           /* Should never occur, but keep compilers from grumbling. */
                   1346: 
                   1347:           default:
                   1348:           OK = codevalue != OP_PROP;
                   1349:           break;
                   1350:           }
                   1351: 
                   1352:         if (OK == (d == OP_PROP))
                   1353:           {
                   1354:           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
                   1355:             {
                   1356:             active_count--;           /* Remove non-match possibility */
                   1357:             next_active_state--;
                   1358:             }
                   1359:           count++;
                   1360:           ADD_NEW(state_offset, count);
                   1361:           }
                   1362:         }
                   1363:       break;
                   1364: 
                   1365:       /*-----------------------------------------------------------------*/
                   1366:       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
                   1367:       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
                   1368:       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
                   1369:       count = current_state->count;  /* Already matched */
                   1370:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1371:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   1372:         {
1.1.1.2   misho    1373:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1374:         int ncount = 0;
                   1375:         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
                   1376:           {
                   1377:           active_count--;           /* Remove non-match possibility */
                   1378:           next_active_state--;
                   1379:           }
                   1380:         while (nptr < end_subject)
                   1381:           {
                   1382:           int nd;
                   1383:           int ndlen = 1;
                   1384:           GETCHARLEN(nd, nptr, ndlen);
                   1385:           if (UCD_CATEGORY(nd) != ucp_M) break;
                   1386:           ncount++;
                   1387:           nptr += ndlen;
                   1388:           }
                   1389:         count++;
                   1390:         ADD_NEW_DATA(-state_offset, count, ncount);
                   1391:         }
                   1392:       break;
                   1393: #endif
                   1394: 
                   1395:       /*-----------------------------------------------------------------*/
                   1396:       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
                   1397:       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
                   1398:       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
                   1399:       count = current_state->count;  /* Already matched */
                   1400:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1401:       if (clen > 0)
                   1402:         {
                   1403:         int ncount = 0;
                   1404:         switch (c)
                   1405:           {
                   1406:           case 0x000b:
                   1407:           case 0x000c:
                   1408:           case 0x0085:
                   1409:           case 0x2028:
                   1410:           case 0x2029:
                   1411:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1412:           goto ANYNL01;
                   1413: 
                   1414:           case 0x000d:
                   1415:           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
                   1416:           /* Fall through */
                   1417: 
                   1418:           ANYNL01:
                   1419:           case 0x000a:
                   1420:           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
                   1421:             {
                   1422:             active_count--;           /* Remove non-match possibility */
                   1423:             next_active_state--;
                   1424:             }
                   1425:           count++;
                   1426:           ADD_NEW_DATA(-state_offset, count, ncount);
                   1427:           break;
                   1428: 
                   1429:           default:
                   1430:           break;
                   1431:           }
                   1432:         }
                   1433:       break;
                   1434: 
                   1435:       /*-----------------------------------------------------------------*/
                   1436:       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
                   1437:       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
                   1438:       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1439:       count = current_state->count;  /* Already matched */
                   1440:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1441:       if (clen > 0)
                   1442:         {
                   1443:         BOOL OK;
                   1444:         switch (c)
                   1445:           {
                   1446:           case 0x000a:
                   1447:           case 0x000b:
                   1448:           case 0x000c:
                   1449:           case 0x000d:
                   1450:           case 0x0085:
                   1451:           case 0x2028:
                   1452:           case 0x2029:
                   1453:           OK = TRUE;
                   1454:           break;
                   1455: 
                   1456:           default:
                   1457:           OK = FALSE;
                   1458:           break;
                   1459:           }
                   1460: 
                   1461:         if (OK == (d == OP_VSPACE))
                   1462:           {
                   1463:           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1464:             {
                   1465:             active_count--;           /* Remove non-match possibility */
                   1466:             next_active_state--;
                   1467:             }
                   1468:           count++;
                   1469:           ADD_NEW_DATA(-state_offset, count, 0);
                   1470:           }
                   1471:         }
                   1472:       break;
                   1473: 
                   1474:       /*-----------------------------------------------------------------*/
                   1475:       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
                   1476:       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
                   1477:       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
                   1478:       count = current_state->count;  /* Already matched */
                   1479:       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
                   1480:       if (clen > 0)
                   1481:         {
                   1482:         BOOL OK;
                   1483:         switch (c)
                   1484:           {
                   1485:           case 0x09:      /* HT */
                   1486:           case 0x20:      /* SPACE */
                   1487:           case 0xa0:      /* NBSP */
                   1488:           case 0x1680:    /* OGHAM SPACE MARK */
                   1489:           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1490:           case 0x2000:    /* EN QUAD */
                   1491:           case 0x2001:    /* EM QUAD */
                   1492:           case 0x2002:    /* EN SPACE */
                   1493:           case 0x2003:    /* EM SPACE */
                   1494:           case 0x2004:    /* THREE-PER-EM SPACE */
                   1495:           case 0x2005:    /* FOUR-PER-EM SPACE */
                   1496:           case 0x2006:    /* SIX-PER-EM SPACE */
                   1497:           case 0x2007:    /* FIGURE SPACE */
                   1498:           case 0x2008:    /* PUNCTUATION SPACE */
                   1499:           case 0x2009:    /* THIN SPACE */
                   1500:           case 0x200A:    /* HAIR SPACE */
                   1501:           case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1502:           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1503:           case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1504:           OK = TRUE;
                   1505:           break;
                   1506: 
                   1507:           default:
                   1508:           OK = FALSE;
                   1509:           break;
                   1510:           }
                   1511: 
                   1512:         if (OK == (d == OP_HSPACE))
                   1513:           {
                   1514:           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
                   1515:             {
                   1516:             active_count--;           /* Remove non-match possibility */
                   1517:             next_active_state--;
                   1518:             }
                   1519:           count++;
                   1520:           ADD_NEW_DATA(-state_offset, count, 0);
                   1521:           }
                   1522:         }
                   1523:       break;
                   1524: 
                   1525:       /*-----------------------------------------------------------------*/
                   1526: #ifdef SUPPORT_UCP
                   1527:       case OP_PROP_EXTRA + OP_TYPEQUERY:
                   1528:       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
                   1529:       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
                   1530:       count = 4;
                   1531:       goto QS1;
                   1532: 
                   1533:       case OP_PROP_EXTRA + OP_TYPESTAR:
                   1534:       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
                   1535:       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
                   1536:       count = 0;
                   1537: 
                   1538:       QS1:
                   1539: 
                   1540:       ADD_ACTIVE(state_offset + 4, 0);
                   1541:       if (clen > 0)
                   1542:         {
                   1543:         BOOL OK;
                   1544:         const ucd_record * prop = GET_UCD(c);
                   1545:         switch(code[2])
                   1546:           {
                   1547:           case PT_ANY:
                   1548:           OK = TRUE;
                   1549:           break;
                   1550: 
                   1551:           case PT_LAMP:
                   1552:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1553:             prop->chartype == ucp_Lt;
                   1554:           break;
                   1555: 
                   1556:           case PT_GC:
1.1.1.2   misho    1557:           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1.1       misho    1558:           break;
                   1559: 
                   1560:           case PT_PC:
                   1561:           OK = prop->chartype == code[3];
                   1562:           break;
                   1563: 
                   1564:           case PT_SC:
                   1565:           OK = prop->script == code[3];
                   1566:           break;
                   1567: 
                   1568:           /* These are specials for combination cases. */
                   1569: 
                   1570:           case PT_ALNUM:
1.1.1.2   misho    1571:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1572:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1573:           break;
                   1574: 
                   1575:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1576:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1577:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1578:           break;
                   1579: 
                   1580:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1581:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1582:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1583:                c == CHAR_FF || c == CHAR_CR;
                   1584:           break;
                   1585: 
                   1586:           case PT_WORD:
1.1.1.2   misho    1587:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1588:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1589:                c == CHAR_UNDERSCORE;
                   1590:           break;
                   1591: 
                   1592:           /* Should never occur, but keep compilers from grumbling. */
                   1593: 
                   1594:           default:
                   1595:           OK = codevalue != OP_PROP;
                   1596:           break;
                   1597:           }
                   1598: 
                   1599:         if (OK == (d == OP_PROP))
                   1600:           {
                   1601:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
                   1602:               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
                   1603:             {
                   1604:             active_count--;           /* Remove non-match possibility */
                   1605:             next_active_state--;
                   1606:             }
                   1607:           ADD_NEW(state_offset + count, 0);
                   1608:           }
                   1609:         }
                   1610:       break;
                   1611: 
                   1612:       /*-----------------------------------------------------------------*/
                   1613:       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
                   1614:       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
                   1615:       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
                   1616:       count = 2;
                   1617:       goto QS2;
                   1618: 
                   1619:       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
                   1620:       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
                   1621:       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
                   1622:       count = 0;
                   1623: 
                   1624:       QS2:
                   1625: 
                   1626:       ADD_ACTIVE(state_offset + 2, 0);
                   1627:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   1628:         {
1.1.1.2   misho    1629:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1630:         int ncount = 0;
                   1631:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
                   1632:             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
                   1633:           {
                   1634:           active_count--;           /* Remove non-match possibility */
                   1635:           next_active_state--;
                   1636:           }
                   1637:         while (nptr < end_subject)
                   1638:           {
                   1639:           int nd;
                   1640:           int ndlen = 1;
                   1641:           GETCHARLEN(nd, nptr, ndlen);
                   1642:           if (UCD_CATEGORY(nd) != ucp_M) break;
                   1643:           ncount++;
                   1644:           nptr += ndlen;
                   1645:           }
                   1646:         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
                   1647:         }
                   1648:       break;
                   1649: #endif
                   1650: 
                   1651:       /*-----------------------------------------------------------------*/
                   1652:       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
                   1653:       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
                   1654:       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
                   1655:       count = 2;
                   1656:       goto QS3;
                   1657: 
                   1658:       case OP_ANYNL_EXTRA + OP_TYPESTAR:
                   1659:       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
                   1660:       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
                   1661:       count = 0;
                   1662: 
                   1663:       QS3:
                   1664:       ADD_ACTIVE(state_offset + 2, 0);
                   1665:       if (clen > 0)
                   1666:         {
                   1667:         int ncount = 0;
                   1668:         switch (c)
                   1669:           {
                   1670:           case 0x000b:
                   1671:           case 0x000c:
                   1672:           case 0x0085:
                   1673:           case 0x2028:
                   1674:           case 0x2029:
                   1675:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1676:           goto ANYNL02;
                   1677: 
                   1678:           case 0x000d:
                   1679:           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
                   1680:           /* Fall through */
                   1681: 
                   1682:           ANYNL02:
                   1683:           case 0x000a:
                   1684:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
                   1685:               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
                   1686:             {
                   1687:             active_count--;           /* Remove non-match possibility */
                   1688:             next_active_state--;
                   1689:             }
                   1690:           ADD_NEW_DATA(-(state_offset + count), 0, ncount);
                   1691:           break;
                   1692: 
                   1693:           default:
                   1694:           break;
                   1695:           }
                   1696:         }
                   1697:       break;
                   1698: 
                   1699:       /*-----------------------------------------------------------------*/
                   1700:       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
                   1701:       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
                   1702:       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1703:       count = 2;
                   1704:       goto QS4;
                   1705: 
                   1706:       case OP_VSPACE_EXTRA + OP_TYPESTAR:
                   1707:       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
                   1708:       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1709:       count = 0;
                   1710: 
                   1711:       QS4:
                   1712:       ADD_ACTIVE(state_offset + 2, 0);
                   1713:       if (clen > 0)
                   1714:         {
                   1715:         BOOL OK;
                   1716:         switch (c)
                   1717:           {
                   1718:           case 0x000a:
                   1719:           case 0x000b:
                   1720:           case 0x000c:
                   1721:           case 0x000d:
                   1722:           case 0x0085:
                   1723:           case 0x2028:
                   1724:           case 0x2029:
                   1725:           OK = TRUE;
                   1726:           break;
                   1727: 
                   1728:           default:
                   1729:           OK = FALSE;
                   1730:           break;
                   1731:           }
                   1732:         if (OK == (d == OP_VSPACE))
                   1733:           {
                   1734:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1735:               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1736:             {
                   1737:             active_count--;           /* Remove non-match possibility */
                   1738:             next_active_state--;
                   1739:             }
                   1740:           ADD_NEW_DATA(-(state_offset + count), 0, 0);
                   1741:           }
                   1742:         }
                   1743:       break;
                   1744: 
                   1745:       /*-----------------------------------------------------------------*/
                   1746:       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
                   1747:       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
                   1748:       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
                   1749:       count = 2;
                   1750:       goto QS5;
                   1751: 
                   1752:       case OP_HSPACE_EXTRA + OP_TYPESTAR:
                   1753:       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
                   1754:       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
                   1755:       count = 0;
                   1756: 
                   1757:       QS5:
                   1758:       ADD_ACTIVE(state_offset + 2, 0);
                   1759:       if (clen > 0)
                   1760:         {
                   1761:         BOOL OK;
                   1762:         switch (c)
                   1763:           {
                   1764:           case 0x09:      /* HT */
                   1765:           case 0x20:      /* SPACE */
                   1766:           case 0xa0:      /* NBSP */
                   1767:           case 0x1680:    /* OGHAM SPACE MARK */
                   1768:           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1769:           case 0x2000:    /* EN QUAD */
                   1770:           case 0x2001:    /* EM QUAD */
                   1771:           case 0x2002:    /* EN SPACE */
                   1772:           case 0x2003:    /* EM SPACE */
                   1773:           case 0x2004:    /* THREE-PER-EM SPACE */
                   1774:           case 0x2005:    /* FOUR-PER-EM SPACE */
                   1775:           case 0x2006:    /* SIX-PER-EM SPACE */
                   1776:           case 0x2007:    /* FIGURE SPACE */
                   1777:           case 0x2008:    /* PUNCTUATION SPACE */
                   1778:           case 0x2009:    /* THIN SPACE */
                   1779:           case 0x200A:    /* HAIR SPACE */
                   1780:           case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1781:           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1782:           case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1783:           OK = TRUE;
                   1784:           break;
                   1785: 
                   1786:           default:
                   1787:           OK = FALSE;
                   1788:           break;
                   1789:           }
                   1790: 
                   1791:         if (OK == (d == OP_HSPACE))
                   1792:           {
                   1793:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
                   1794:               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
                   1795:             {
                   1796:             active_count--;           /* Remove non-match possibility */
                   1797:             next_active_state--;
                   1798:             }
                   1799:           ADD_NEW_DATA(-(state_offset + count), 0, 0);
                   1800:           }
                   1801:         }
                   1802:       break;
                   1803: 
                   1804:       /*-----------------------------------------------------------------*/
                   1805: #ifdef SUPPORT_UCP
                   1806:       case OP_PROP_EXTRA + OP_TYPEEXACT:
                   1807:       case OP_PROP_EXTRA + OP_TYPEUPTO:
                   1808:       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
                   1809:       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
                   1810:       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1811:         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1.1       misho    1812:       count = current_state->count;  /* Number already matched */
                   1813:       if (clen > 0)
                   1814:         {
                   1815:         BOOL OK;
                   1816:         const ucd_record * prop = GET_UCD(c);
1.1.1.2   misho    1817:         switch(code[1 + IMM2_SIZE + 1])
1.1       misho    1818:           {
                   1819:           case PT_ANY:
                   1820:           OK = TRUE;
                   1821:           break;
                   1822: 
                   1823:           case PT_LAMP:
                   1824:           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                   1825:             prop->chartype == ucp_Lt;
                   1826:           break;
                   1827: 
                   1828:           case PT_GC:
1.1.1.2   misho    1829:           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1.1       misho    1830:           break;
                   1831: 
                   1832:           case PT_PC:
1.1.1.2   misho    1833:           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1.1       misho    1834:           break;
                   1835: 
                   1836:           case PT_SC:
1.1.1.2   misho    1837:           OK = prop->script == code[1 + IMM2_SIZE + 2];
1.1       misho    1838:           break;
                   1839: 
                   1840:           /* These are specials for combination cases. */
                   1841: 
                   1842:           case PT_ALNUM:
1.1.1.2   misho    1843:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1844:                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1.1       misho    1845:           break;
                   1846: 
                   1847:           case PT_SPACE:    /* Perl space */
1.1.1.2   misho    1848:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1849:                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
                   1850:           break;
                   1851: 
                   1852:           case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    1853:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    1854:                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   1855:                c == CHAR_FF || c == CHAR_CR;
                   1856:           break;
                   1857: 
                   1858:           case PT_WORD:
1.1.1.2   misho    1859:           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   1860:                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    1861:                c == CHAR_UNDERSCORE;
                   1862:           break;
                   1863: 
                   1864:           /* Should never occur, but keep compilers from grumbling. */
                   1865: 
                   1866:           default:
                   1867:           OK = codevalue != OP_PROP;
                   1868:           break;
                   1869:           }
                   1870: 
                   1871:         if (OK == (d == OP_PROP))
                   1872:           {
                   1873:           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
                   1874:             {
                   1875:             active_count--;           /* Remove non-match possibility */
                   1876:             next_active_state--;
                   1877:             }
                   1878:           if (++count >= GET2(code, 1))
1.1.1.2   misho    1879:             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1.1       misho    1880:           else
                   1881:             { ADD_NEW(state_offset, count); }
                   1882:           }
                   1883:         }
                   1884:       break;
                   1885: 
                   1886:       /*-----------------------------------------------------------------*/
                   1887:       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
                   1888:       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
                   1889:       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
                   1890:       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
                   1891:       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1892:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1893:       count = current_state->count;  /* Number already matched */
                   1894:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   1895:         {
1.1.1.2   misho    1896:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    1897:         int ncount = 0;
                   1898:         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
                   1899:           {
                   1900:           active_count--;           /* Remove non-match possibility */
                   1901:           next_active_state--;
                   1902:           }
                   1903:         while (nptr < end_subject)
                   1904:           {
                   1905:           int nd;
                   1906:           int ndlen = 1;
                   1907:           GETCHARLEN(nd, nptr, ndlen);
                   1908:           if (UCD_CATEGORY(nd) != ucp_M) break;
                   1909:           ncount++;
                   1910:           nptr += ndlen;
                   1911:           }
1.1.1.3 ! misho    1912:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
        !          1913:             reset_could_continue = TRUE;
1.1       misho    1914:         if (++count >= GET2(code, 1))
1.1.1.2   misho    1915:           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1.1       misho    1916:         else
                   1917:           { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1918:         }
                   1919:       break;
                   1920: #endif
                   1921: 
                   1922:       /*-----------------------------------------------------------------*/
                   1923:       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
                   1924:       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
                   1925:       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
                   1926:       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
                   1927:       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1928:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1929:       count = current_state->count;  /* Number already matched */
                   1930:       if (clen > 0)
                   1931:         {
                   1932:         int ncount = 0;
                   1933:         switch (c)
                   1934:           {
                   1935:           case 0x000b:
                   1936:           case 0x000c:
                   1937:           case 0x0085:
                   1938:           case 0x2028:
                   1939:           case 0x2029:
                   1940:           if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   1941:           goto ANYNL03;
                   1942: 
                   1943:           case 0x000d:
                   1944:           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
                   1945:           /* Fall through */
                   1946: 
                   1947:           ANYNL03:
                   1948:           case 0x000a:
                   1949:           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
                   1950:             {
                   1951:             active_count--;           /* Remove non-match possibility */
                   1952:             next_active_state--;
                   1953:             }
                   1954:           if (++count >= GET2(code, 1))
1.1.1.2   misho    1955:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1.1       misho    1956:           else
                   1957:             { ADD_NEW_DATA(-state_offset, count, ncount); }
                   1958:           break;
                   1959: 
                   1960:           default:
                   1961:           break;
                   1962:           }
                   1963:         }
                   1964:       break;
                   1965: 
                   1966:       /*-----------------------------------------------------------------*/
                   1967:       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
                   1968:       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
                   1969:       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
                   1970:       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
                   1971:       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    1972:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    1973:       count = current_state->count;  /* Number already matched */
                   1974:       if (clen > 0)
                   1975:         {
                   1976:         BOOL OK;
                   1977:         switch (c)
                   1978:           {
                   1979:           case 0x000a:
                   1980:           case 0x000b:
                   1981:           case 0x000c:
                   1982:           case 0x000d:
                   1983:           case 0x0085:
                   1984:           case 0x2028:
                   1985:           case 0x2029:
                   1986:           OK = TRUE;
                   1987:           break;
                   1988: 
                   1989:           default:
                   1990:           OK = FALSE;
                   1991:           }
                   1992: 
                   1993:         if (OK == (d == OP_VSPACE))
                   1994:           {
                   1995:           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
                   1996:             {
                   1997:             active_count--;           /* Remove non-match possibility */
                   1998:             next_active_state--;
                   1999:             }
                   2000:           if (++count >= GET2(code, 1))
1.1.1.2   misho    2001:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1.1       misho    2002:           else
                   2003:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   2004:           }
                   2005:         }
                   2006:       break;
                   2007: 
                   2008:       /*-----------------------------------------------------------------*/
                   2009:       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
                   2010:       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
                   2011:       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
                   2012:       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
                   2013:       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1.1.1.2   misho    2014:         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1.1       misho    2015:       count = current_state->count;  /* Number already matched */
                   2016:       if (clen > 0)
                   2017:         {
                   2018:         BOOL OK;
                   2019:         switch (c)
                   2020:           {
                   2021:           case 0x09:      /* HT */
                   2022:           case 0x20:      /* SPACE */
                   2023:           case 0xa0:      /* NBSP */
                   2024:           case 0x1680:    /* OGHAM SPACE MARK */
                   2025:           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2026:           case 0x2000:    /* EN QUAD */
                   2027:           case 0x2001:    /* EM QUAD */
                   2028:           case 0x2002:    /* EN SPACE */
                   2029:           case 0x2003:    /* EM SPACE */
                   2030:           case 0x2004:    /* THREE-PER-EM SPACE */
                   2031:           case 0x2005:    /* FOUR-PER-EM SPACE */
                   2032:           case 0x2006:    /* SIX-PER-EM SPACE */
                   2033:           case 0x2007:    /* FIGURE SPACE */
                   2034:           case 0x2008:    /* PUNCTUATION SPACE */
                   2035:           case 0x2009:    /* THIN SPACE */
                   2036:           case 0x200A:    /* HAIR SPACE */
                   2037:           case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2038:           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2039:           case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2040:           OK = TRUE;
                   2041:           break;
                   2042: 
                   2043:           default:
                   2044:           OK = FALSE;
                   2045:           break;
                   2046:           }
                   2047: 
                   2048:         if (OK == (d == OP_HSPACE))
                   2049:           {
                   2050:           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
                   2051:             {
                   2052:             active_count--;           /* Remove non-match possibility */
                   2053:             next_active_state--;
                   2054:             }
                   2055:           if (++count >= GET2(code, 1))
1.1.1.2   misho    2056:             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
1.1       misho    2057:           else
                   2058:             { ADD_NEW_DATA(-state_offset, count, 0); }
                   2059:           }
                   2060:         }
                   2061:       break;
                   2062: 
                   2063: /* ========================================================================== */
                   2064:       /* These opcodes are followed by a character that is usually compared
                   2065:       to the current subject character; it is loaded into d. We still get
                   2066:       here even if there is no subject character, because in some cases zero
                   2067:       repetitions are permitted. */
                   2068: 
                   2069:       /*-----------------------------------------------------------------*/
                   2070:       case OP_CHAR:
                   2071:       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2072:       break;
                   2073: 
                   2074:       /*-----------------------------------------------------------------*/
                   2075:       case OP_CHARI:
                   2076:       if (clen == 0) break;
                   2077: 
1.1.1.2   misho    2078: #ifdef SUPPORT_UTF
                   2079:       if (utf)
1.1       misho    2080:         {
                   2081:         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
                   2082:           {
                   2083:           unsigned int othercase;
1.1.1.2   misho    2084:           if (c < 128)
                   2085:             othercase = fcc[c];
                   2086:           else
                   2087:             /* If we have Unicode property support, we can use it to test the
                   2088:             other case of the character. */
1.1       misho    2089: #ifdef SUPPORT_UCP
1.1.1.2   misho    2090:             othercase = UCD_OTHERCASE(c);
1.1       misho    2091: #else
1.1.1.2   misho    2092:             othercase = NOTACHAR;
1.1       misho    2093: #endif
                   2094: 
                   2095:           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2096:           }
                   2097:         }
                   2098:       else
1.1.1.2   misho    2099: #endif  /* SUPPORT_UTF */
                   2100:       /* Not UTF mode */
1.1       misho    2101:         {
1.1.1.2   misho    2102:         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
                   2103:           { ADD_NEW(state_offset + 2, 0); }
1.1       misho    2104:         }
                   2105:       break;
                   2106: 
                   2107: 
                   2108: #ifdef SUPPORT_UCP
                   2109:       /*-----------------------------------------------------------------*/
                   2110:       /* This is a tricky one because it can match more than one character.
                   2111:       Find out how many characters to skip, and then set up a negative state
                   2112:       to wait for them to pass before continuing. */
                   2113: 
                   2114:       case OP_EXTUNI:
                   2115:       if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
                   2116:         {
1.1.1.2   misho    2117:         const pcre_uchar *nptr = ptr + clen;
1.1       misho    2118:         int ncount = 0;
                   2119:         while (nptr < end_subject)
                   2120:           {
                   2121:           int nclen = 1;
                   2122:           GETCHARLEN(c, nptr, nclen);
                   2123:           if (UCD_CATEGORY(c) != ucp_M) break;
                   2124:           ncount++;
                   2125:           nptr += nclen;
                   2126:           }
1.1.1.3 ! misho    2127:         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
        !          2128:             reset_could_continue = TRUE;
1.1       misho    2129:         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
                   2130:         }
                   2131:       break;
                   2132: #endif
                   2133: 
                   2134:       /*-----------------------------------------------------------------*/
                   2135:       /* This is a tricky like EXTUNI because it too can match more than one
                   2136:       character (when CR is followed by LF). In this case, set up a negative
                   2137:       state to wait for one character to pass before continuing. */
                   2138: 
                   2139:       case OP_ANYNL:
                   2140:       if (clen > 0) switch(c)
                   2141:         {
                   2142:         case 0x000b:
                   2143:         case 0x000c:
                   2144:         case 0x0085:
                   2145:         case 0x2028:
                   2146:         case 0x2029:
                   2147:         if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
                   2148: 
                   2149:         case 0x000a:
                   2150:         ADD_NEW(state_offset + 1, 0);
                   2151:         break;
                   2152: 
                   2153:         case 0x000d:
1.1.1.3 ! misho    2154:         if (ptr + 1 >= end_subject)
        !          2155:           {
        !          2156:           ADD_NEW(state_offset + 1, 0);
        !          2157:           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
        !          2158:             reset_could_continue = TRUE;
        !          2159:           }
        !          2160:         else if (ptr[1] == 0x0a)
1.1       misho    2161:           {
                   2162:           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
                   2163:           }
                   2164:         else
                   2165:           {
                   2166:           ADD_NEW(state_offset + 1, 0);
                   2167:           }
                   2168:         break;
                   2169:         }
                   2170:       break;
                   2171: 
                   2172:       /*-----------------------------------------------------------------*/
                   2173:       case OP_NOT_VSPACE:
                   2174:       if (clen > 0) switch(c)
                   2175:         {
                   2176:         case 0x000a:
                   2177:         case 0x000b:
                   2178:         case 0x000c:
                   2179:         case 0x000d:
                   2180:         case 0x0085:
                   2181:         case 0x2028:
                   2182:         case 0x2029:
                   2183:         break;
                   2184: 
                   2185:         default:
                   2186:         ADD_NEW(state_offset + 1, 0);
                   2187:         break;
                   2188:         }
                   2189:       break;
                   2190: 
                   2191:       /*-----------------------------------------------------------------*/
                   2192:       case OP_VSPACE:
                   2193:       if (clen > 0) switch(c)
                   2194:         {
                   2195:         case 0x000a:
                   2196:         case 0x000b:
                   2197:         case 0x000c:
                   2198:         case 0x000d:
                   2199:         case 0x0085:
                   2200:         case 0x2028:
                   2201:         case 0x2029:
                   2202:         ADD_NEW(state_offset + 1, 0);
                   2203:         break;
                   2204: 
                   2205:         default: break;
                   2206:         }
                   2207:       break;
                   2208: 
                   2209:       /*-----------------------------------------------------------------*/
                   2210:       case OP_NOT_HSPACE:
                   2211:       if (clen > 0) switch(c)
                   2212:         {
                   2213:         case 0x09:      /* HT */
                   2214:         case 0x20:      /* SPACE */
                   2215:         case 0xa0:      /* NBSP */
                   2216:         case 0x1680:    /* OGHAM SPACE MARK */
                   2217:         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2218:         case 0x2000:    /* EN QUAD */
                   2219:         case 0x2001:    /* EM QUAD */
                   2220:         case 0x2002:    /* EN SPACE */
                   2221:         case 0x2003:    /* EM SPACE */
                   2222:         case 0x2004:    /* THREE-PER-EM SPACE */
                   2223:         case 0x2005:    /* FOUR-PER-EM SPACE */
                   2224:         case 0x2006:    /* SIX-PER-EM SPACE */
                   2225:         case 0x2007:    /* FIGURE SPACE */
                   2226:         case 0x2008:    /* PUNCTUATION SPACE */
                   2227:         case 0x2009:    /* THIN SPACE */
                   2228:         case 0x200A:    /* HAIR SPACE */
                   2229:         case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2230:         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2231:         case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2232:         break;
                   2233: 
                   2234:         default:
                   2235:         ADD_NEW(state_offset + 1, 0);
                   2236:         break;
                   2237:         }
                   2238:       break;
                   2239: 
                   2240:       /*-----------------------------------------------------------------*/
                   2241:       case OP_HSPACE:
                   2242:       if (clen > 0) switch(c)
                   2243:         {
                   2244:         case 0x09:      /* HT */
                   2245:         case 0x20:      /* SPACE */
                   2246:         case 0xa0:      /* NBSP */
                   2247:         case 0x1680:    /* OGHAM SPACE MARK */
                   2248:         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2249:         case 0x2000:    /* EN QUAD */
                   2250:         case 0x2001:    /* EM QUAD */
                   2251:         case 0x2002:    /* EN SPACE */
                   2252:         case 0x2003:    /* EM SPACE */
                   2253:         case 0x2004:    /* THREE-PER-EM SPACE */
                   2254:         case 0x2005:    /* FOUR-PER-EM SPACE */
                   2255:         case 0x2006:    /* SIX-PER-EM SPACE */
                   2256:         case 0x2007:    /* FIGURE SPACE */
                   2257:         case 0x2008:    /* PUNCTUATION SPACE */
                   2258:         case 0x2009:    /* THIN SPACE */
                   2259:         case 0x200A:    /* HAIR SPACE */
                   2260:         case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2261:         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2262:         case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2263:         ADD_NEW(state_offset + 1, 0);
                   2264:         break;
                   2265:         }
                   2266:       break;
                   2267: 
                   2268:       /*-----------------------------------------------------------------*/
1.1.1.3 ! misho    2269:       /* Match a negated single character casefully. */
1.1       misho    2270: 
                   2271:       case OP_NOT:
                   2272:       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
                   2273:       break;
                   2274: 
                   2275:       /*-----------------------------------------------------------------*/
1.1.1.3 ! misho    2276:       /* Match a negated single character caselessly. */
1.1       misho    2277: 
                   2278:       case OP_NOTI:
1.1.1.3 ! misho    2279:       if (clen > 0)
        !          2280:         {
        !          2281:         unsigned int otherd;
        !          2282: #ifdef SUPPORT_UTF
        !          2283:         if (utf && d >= 128)
        !          2284:           {
        !          2285: #ifdef SUPPORT_UCP
        !          2286:           otherd = UCD_OTHERCASE(d);
        !          2287: #endif  /* SUPPORT_UCP */
        !          2288:           }
        !          2289:         else
        !          2290: #endif  /* SUPPORT_UTF */
        !          2291:         otherd = TABLE_GET(d, fcc, d);
        !          2292:         if (c != d && c != otherd)
        !          2293:           { ADD_NEW(state_offset + dlen + 1, 0); }
        !          2294:         }
1.1       misho    2295:       break;
                   2296: 
                   2297:       /*-----------------------------------------------------------------*/
                   2298:       case OP_PLUSI:
                   2299:       case OP_MINPLUSI:
                   2300:       case OP_POSPLUSI:
                   2301:       case OP_NOTPLUSI:
                   2302:       case OP_NOTMINPLUSI:
                   2303:       case OP_NOTPOSPLUSI:
                   2304:       caseless = TRUE;
                   2305:       codevalue -= OP_STARI - OP_STAR;
                   2306: 
                   2307:       /* Fall through */
                   2308:       case OP_PLUS:
                   2309:       case OP_MINPLUS:
                   2310:       case OP_POSPLUS:
                   2311:       case OP_NOTPLUS:
                   2312:       case OP_NOTMINPLUS:
                   2313:       case OP_NOTPOSPLUS:
                   2314:       count = current_state->count;  /* Already matched */
                   2315:       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
                   2316:       if (clen > 0)
                   2317:         {
                   2318:         unsigned int otherd = NOTACHAR;
                   2319:         if (caseless)
                   2320:           {
1.1.1.2   misho    2321: #ifdef SUPPORT_UTF
                   2322:           if (utf && d >= 128)
1.1       misho    2323:             {
                   2324: #ifdef SUPPORT_UCP
                   2325:             otherd = UCD_OTHERCASE(d);
                   2326: #endif  /* SUPPORT_UCP */
                   2327:             }
                   2328:           else
1.1.1.2   misho    2329: #endif  /* SUPPORT_UTF */
                   2330:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2331:           }
                   2332:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2333:           {
                   2334:           if (count > 0 &&
                   2335:               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
                   2336:             {
                   2337:             active_count--;             /* Remove non-match possibility */
                   2338:             next_active_state--;
                   2339:             }
                   2340:           count++;
                   2341:           ADD_NEW(state_offset, count);
                   2342:           }
                   2343:         }
                   2344:       break;
                   2345: 
                   2346:       /*-----------------------------------------------------------------*/
                   2347:       case OP_QUERYI:
                   2348:       case OP_MINQUERYI:
                   2349:       case OP_POSQUERYI:
                   2350:       case OP_NOTQUERYI:
                   2351:       case OP_NOTMINQUERYI:
                   2352:       case OP_NOTPOSQUERYI:
                   2353:       caseless = TRUE;
                   2354:       codevalue -= OP_STARI - OP_STAR;
                   2355:       /* Fall through */
                   2356:       case OP_QUERY:
                   2357:       case OP_MINQUERY:
                   2358:       case OP_POSQUERY:
                   2359:       case OP_NOTQUERY:
                   2360:       case OP_NOTMINQUERY:
                   2361:       case OP_NOTPOSQUERY:
                   2362:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2363:       if (clen > 0)
                   2364:         {
                   2365:         unsigned int otherd = NOTACHAR;
                   2366:         if (caseless)
                   2367:           {
1.1.1.2   misho    2368: #ifdef SUPPORT_UTF
                   2369:           if (utf && d >= 128)
1.1       misho    2370:             {
                   2371: #ifdef SUPPORT_UCP
                   2372:             otherd = UCD_OTHERCASE(d);
                   2373: #endif  /* SUPPORT_UCP */
                   2374:             }
                   2375:           else
1.1.1.2   misho    2376: #endif  /* SUPPORT_UTF */
                   2377:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2378:           }
                   2379:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2380:           {
                   2381:           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
                   2382:             {
                   2383:             active_count--;            /* Remove non-match possibility */
                   2384:             next_active_state--;
                   2385:             }
                   2386:           ADD_NEW(state_offset + dlen + 1, 0);
                   2387:           }
                   2388:         }
                   2389:       break;
                   2390: 
                   2391:       /*-----------------------------------------------------------------*/
                   2392:       case OP_STARI:
                   2393:       case OP_MINSTARI:
                   2394:       case OP_POSSTARI:
                   2395:       case OP_NOTSTARI:
                   2396:       case OP_NOTMINSTARI:
                   2397:       case OP_NOTPOSSTARI:
                   2398:       caseless = TRUE;
                   2399:       codevalue -= OP_STARI - OP_STAR;
                   2400:       /* Fall through */
                   2401:       case OP_STAR:
                   2402:       case OP_MINSTAR:
                   2403:       case OP_POSSTAR:
                   2404:       case OP_NOTSTAR:
                   2405:       case OP_NOTMINSTAR:
                   2406:       case OP_NOTPOSSTAR:
                   2407:       ADD_ACTIVE(state_offset + dlen + 1, 0);
                   2408:       if (clen > 0)
                   2409:         {
                   2410:         unsigned int otherd = NOTACHAR;
                   2411:         if (caseless)
                   2412:           {
1.1.1.2   misho    2413: #ifdef SUPPORT_UTF
                   2414:           if (utf && d >= 128)
1.1       misho    2415:             {
                   2416: #ifdef SUPPORT_UCP
                   2417:             otherd = UCD_OTHERCASE(d);
                   2418: #endif  /* SUPPORT_UCP */
                   2419:             }
                   2420:           else
1.1.1.2   misho    2421: #endif  /* SUPPORT_UTF */
                   2422:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2423:           }
                   2424:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2425:           {
                   2426:           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
                   2427:             {
                   2428:             active_count--;            /* Remove non-match possibility */
                   2429:             next_active_state--;
                   2430:             }
                   2431:           ADD_NEW(state_offset, 0);
                   2432:           }
                   2433:         }
                   2434:       break;
                   2435: 
                   2436:       /*-----------------------------------------------------------------*/
                   2437:       case OP_EXACTI:
                   2438:       case OP_NOTEXACTI:
                   2439:       caseless = TRUE;
                   2440:       codevalue -= OP_STARI - OP_STAR;
                   2441:       /* Fall through */
                   2442:       case OP_EXACT:
                   2443:       case OP_NOTEXACT:
                   2444:       count = current_state->count;  /* Number already matched */
                   2445:       if (clen > 0)
                   2446:         {
                   2447:         unsigned int otherd = NOTACHAR;
                   2448:         if (caseless)
                   2449:           {
1.1.1.2   misho    2450: #ifdef SUPPORT_UTF
                   2451:           if (utf && d >= 128)
1.1       misho    2452:             {
                   2453: #ifdef SUPPORT_UCP
                   2454:             otherd = UCD_OTHERCASE(d);
                   2455: #endif  /* SUPPORT_UCP */
                   2456:             }
                   2457:           else
1.1.1.2   misho    2458: #endif  /* SUPPORT_UTF */
                   2459:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2460:           }
                   2461:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2462:           {
                   2463:           if (++count >= GET2(code, 1))
1.1.1.2   misho    2464:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
1.1       misho    2465:           else
                   2466:             { ADD_NEW(state_offset, count); }
                   2467:           }
                   2468:         }
                   2469:       break;
                   2470: 
                   2471:       /*-----------------------------------------------------------------*/
                   2472:       case OP_UPTOI:
                   2473:       case OP_MINUPTOI:
                   2474:       case OP_POSUPTOI:
                   2475:       case OP_NOTUPTOI:
                   2476:       case OP_NOTMINUPTOI:
                   2477:       case OP_NOTPOSUPTOI:
                   2478:       caseless = TRUE;
                   2479:       codevalue -= OP_STARI - OP_STAR;
                   2480:       /* Fall through */
                   2481:       case OP_UPTO:
                   2482:       case OP_MINUPTO:
                   2483:       case OP_POSUPTO:
                   2484:       case OP_NOTUPTO:
                   2485:       case OP_NOTMINUPTO:
                   2486:       case OP_NOTPOSUPTO:
1.1.1.2   misho    2487:       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
1.1       misho    2488:       count = current_state->count;  /* Number already matched */
                   2489:       if (clen > 0)
                   2490:         {
                   2491:         unsigned int otherd = NOTACHAR;
                   2492:         if (caseless)
                   2493:           {
1.1.1.2   misho    2494: #ifdef SUPPORT_UTF
                   2495:           if (utf && d >= 128)
1.1       misho    2496:             {
                   2497: #ifdef SUPPORT_UCP
                   2498:             otherd = UCD_OTHERCASE(d);
                   2499: #endif  /* SUPPORT_UCP */
                   2500:             }
                   2501:           else
1.1.1.2   misho    2502: #endif  /* SUPPORT_UTF */
                   2503:           otherd = TABLE_GET(d, fcc, d);
1.1       misho    2504:           }
                   2505:         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
                   2506:           {
                   2507:           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
                   2508:             {
                   2509:             active_count--;             /* Remove non-match possibility */
                   2510:             next_active_state--;
                   2511:             }
                   2512:           if (++count >= GET2(code, 1))
1.1.1.2   misho    2513:             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
1.1       misho    2514:           else
                   2515:             { ADD_NEW(state_offset, count); }
                   2516:           }
                   2517:         }
                   2518:       break;
                   2519: 
                   2520: 
                   2521: /* ========================================================================== */
                   2522:       /* These are the class-handling opcodes */
                   2523: 
                   2524:       case OP_CLASS:
                   2525:       case OP_NCLASS:
                   2526:       case OP_XCLASS:
                   2527:         {
                   2528:         BOOL isinclass = FALSE;
                   2529:         int next_state_offset;
1.1.1.2   misho    2530:         const pcre_uchar *ecode;
1.1       misho    2531: 
                   2532:         /* For a simple class, there is always just a 32-byte table, and we
                   2533:         can set isinclass from it. */
                   2534: 
                   2535:         if (codevalue != OP_XCLASS)
                   2536:           {
1.1.1.2   misho    2537:           ecode = code + 1 + (32 / sizeof(pcre_uchar));
1.1       misho    2538:           if (clen > 0)
                   2539:             {
                   2540:             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
1.1.1.2   misho    2541:               ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
1.1       misho    2542:             }
                   2543:           }
                   2544: 
                   2545:         /* An extended class may have a table or a list of single characters,
                   2546:         ranges, or both, and it may be positive or negative. There's a
                   2547:         function that sorts all this out. */
                   2548: 
                   2549:         else
                   2550:          {
                   2551:          ecode = code + GET(code, 1);
1.1.1.2   misho    2552:          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
1.1       misho    2553:          }
                   2554: 
                   2555:         /* At this point, isinclass is set for all kinds of class, and ecode
                   2556:         points to the byte after the end of the class. If there is a
                   2557:         quantifier, this is where it will be. */
                   2558: 
                   2559:         next_state_offset = (int)(ecode - start_code);
                   2560: 
                   2561:         switch (*ecode)
                   2562:           {
                   2563:           case OP_CRSTAR:
                   2564:           case OP_CRMINSTAR:
                   2565:           ADD_ACTIVE(next_state_offset + 1, 0);
                   2566:           if (isinclass) { ADD_NEW(state_offset, 0); }
                   2567:           break;
                   2568: 
                   2569:           case OP_CRPLUS:
                   2570:           case OP_CRMINPLUS:
                   2571:           count = current_state->count;  /* Already matched */
                   2572:           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
                   2573:           if (isinclass) { count++; ADD_NEW(state_offset, count); }
                   2574:           break;
                   2575: 
                   2576:           case OP_CRQUERY:
                   2577:           case OP_CRMINQUERY:
                   2578:           ADD_ACTIVE(next_state_offset + 1, 0);
                   2579:           if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
                   2580:           break;
                   2581: 
                   2582:           case OP_CRRANGE:
                   2583:           case OP_CRMINRANGE:
                   2584:           count = current_state->count;  /* Already matched */
                   2585:           if (count >= GET2(ecode, 1))
1.1.1.2   misho    2586:             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
1.1       misho    2587:           if (isinclass)
                   2588:             {
1.1.1.2   misho    2589:             int max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2590:             if (++count >= max && max != 0)   /* Max 0 => no limit */
1.1.1.2   misho    2591:               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
1.1       misho    2592:             else
                   2593:               { ADD_NEW(state_offset, count); }
                   2594:             }
                   2595:           break;
                   2596: 
                   2597:           default:
                   2598:           if (isinclass) { ADD_NEW(next_state_offset, 0); }
                   2599:           break;
                   2600:           }
                   2601:         }
                   2602:       break;
                   2603: 
                   2604: /* ========================================================================== */
                   2605:       /* These are the opcodes for fancy brackets of various kinds. We have
                   2606:       to use recursion in order to handle them. The "always failing" assertion
                   2607:       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
                   2608:       though the other "backtracking verbs" are not supported. */
                   2609: 
                   2610:       case OP_FAIL:
                   2611:       forced_fail++;    /* Count FAILs for multiple states */
                   2612:       break;
                   2613: 
                   2614:       case OP_ASSERT:
                   2615:       case OP_ASSERT_NOT:
                   2616:       case OP_ASSERTBACK:
                   2617:       case OP_ASSERTBACK_NOT:
                   2618:         {
                   2619:         int rc;
                   2620:         int local_offsets[2];
                   2621:         int local_workspace[1000];
1.1.1.2   misho    2622:         const pcre_uchar *endasscode = code + GET(code, 1);
1.1       misho    2623: 
                   2624:         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2625: 
                   2626:         rc = internal_dfa_exec(
                   2627:           md,                                   /* static match data */
                   2628:           code,                                 /* this subexpression's code */
                   2629:           ptr,                                  /* where we currently are */
                   2630:           (int)(ptr - start_subject),           /* start offset */
                   2631:           local_offsets,                        /* offset vector */
                   2632:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2633:           local_workspace,                      /* workspace vector */
                   2634:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2635:           rlevel);                              /* function recursion level */
                   2636: 
                   2637:         if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2638:         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
                   2639:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2640:         }
                   2641:       break;
                   2642: 
                   2643:       /*-----------------------------------------------------------------*/
                   2644:       case OP_COND:
                   2645:       case OP_SCOND:
                   2646:         {
                   2647:         int local_offsets[1000];
                   2648:         int local_workspace[1000];
                   2649:         int codelink = GET(code, 1);
                   2650:         int condcode;
                   2651: 
                   2652:         /* Because of the way auto-callout works during compile, a callout item
                   2653:         is inserted between OP_COND and an assertion condition. This does not
                   2654:         happen for the other conditions. */
                   2655: 
                   2656:         if (code[LINK_SIZE+1] == OP_CALLOUT)
                   2657:           {
                   2658:           rrc = 0;
1.1.1.2   misho    2659:           if (PUBL(callout) != NULL)
1.1       misho    2660:             {
1.1.1.2   misho    2661:             PUBL(callout_block) cb;
1.1       misho    2662:             cb.version          = 1;   /* Version 1 of the callout block */
                   2663:             cb.callout_number   = code[LINK_SIZE+2];
                   2664:             cb.offset_vector    = offsets;
1.1.1.2   misho    2665: #ifdef COMPILE_PCRE8
1.1       misho    2666:             cb.subject          = (PCRE_SPTR)start_subject;
1.1.1.2   misho    2667: #else
                   2668:             cb.subject          = (PCRE_SPTR16)start_subject;
                   2669: #endif
1.1       misho    2670:             cb.subject_length   = (int)(end_subject - start_subject);
                   2671:             cb.start_match      = (int)(current_subject - start_subject);
                   2672:             cb.current_position = (int)(ptr - start_subject);
                   2673:             cb.pattern_position = GET(code, LINK_SIZE + 3);
                   2674:             cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
                   2675:             cb.capture_top      = 1;
                   2676:             cb.capture_last     = -1;
                   2677:             cb.callout_data     = md->callout_data;
                   2678:             cb.mark             = NULL;   /* No (*MARK) support */
1.1.1.2   misho    2679:             if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
1.1       misho    2680:             }
                   2681:           if (rrc > 0) break;                      /* Fail this thread */
1.1.1.2   misho    2682:           code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
1.1       misho    2683:           }
                   2684: 
                   2685:         condcode = code[LINK_SIZE+1];
                   2686: 
                   2687:         /* Back reference conditions are not supported */
                   2688: 
                   2689:         if (condcode == OP_CREF || condcode == OP_NCREF)
                   2690:           return PCRE_ERROR_DFA_UCOND;
                   2691: 
                   2692:         /* The DEFINE condition is always false */
                   2693: 
                   2694:         if (condcode == OP_DEF)
                   2695:           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2696: 
                   2697:         /* The only supported version of OP_RREF is for the value RREF_ANY,
                   2698:         which means "test if in any recursion". We can't test for specifically
                   2699:         recursed groups. */
                   2700: 
                   2701:         else if (condcode == OP_RREF || condcode == OP_NRREF)
                   2702:           {
1.1.1.2   misho    2703:           int value = GET2(code, LINK_SIZE + 2);
1.1       misho    2704:           if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
                   2705:           if (md->recursive != NULL)
1.1.1.2   misho    2706:             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
1.1       misho    2707:           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2708:           }
                   2709: 
                   2710:         /* Otherwise, the condition is an assertion */
                   2711: 
                   2712:         else
                   2713:           {
                   2714:           int rc;
1.1.1.2   misho    2715:           const pcre_uchar *asscode = code + LINK_SIZE + 1;
                   2716:           const pcre_uchar *endasscode = asscode + GET(asscode, 1);
1.1       misho    2717: 
                   2718:           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
                   2719: 
                   2720:           rc = internal_dfa_exec(
                   2721:             md,                                   /* fixed match data */
                   2722:             asscode,                              /* this subexpression's code */
                   2723:             ptr,                                  /* where we currently are */
                   2724:             (int)(ptr - start_subject),           /* start offset */
                   2725:             local_offsets,                        /* offset vector */
                   2726:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2727:             local_workspace,                      /* workspace vector */
                   2728:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2729:             rlevel);                              /* function recursion level */
                   2730: 
                   2731:           if (rc == PCRE_ERROR_DFA_UITEM) return rc;
                   2732:           if ((rc >= 0) ==
                   2733:                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
                   2734:             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
                   2735:           else
                   2736:             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
                   2737:           }
                   2738:         }
                   2739:       break;
                   2740: 
                   2741:       /*-----------------------------------------------------------------*/
                   2742:       case OP_RECURSE:
                   2743:         {
                   2744:         dfa_recursion_info *ri;
                   2745:         int local_offsets[1000];
                   2746:         int local_workspace[1000];
1.1.1.2   misho    2747:         const pcre_uchar *callpat = start_code + GET(code, 1);
1.1       misho    2748:         int recno = (callpat == md->start_code)? 0 :
                   2749:           GET2(callpat, 1 + LINK_SIZE);
                   2750:         int rc;
                   2751: 
                   2752:         DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
                   2753: 
                   2754:         /* Check for repeating a recursion without advancing the subject
                   2755:         pointer. This should catch convoluted mutual recursions. (Some simple
                   2756:         cases are caught at compile time.) */
                   2757: 
                   2758:         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   2759:           if (recno == ri->group_num && ptr == ri->subject_position)
                   2760:             return PCRE_ERROR_RECURSELOOP;
                   2761: 
                   2762:         /* Remember this recursion and where we started it so as to
                   2763:         catch infinite loops. */
                   2764: 
                   2765:         new_recursive.group_num = recno;
                   2766:         new_recursive.subject_position = ptr;
                   2767:         new_recursive.prevrec = md->recursive;
                   2768:         md->recursive = &new_recursive;
                   2769: 
                   2770:         rc = internal_dfa_exec(
                   2771:           md,                                   /* fixed match data */
                   2772:           callpat,                              /* this subexpression's code */
                   2773:           ptr,                                  /* where we currently are */
                   2774:           (int)(ptr - start_subject),           /* start offset */
                   2775:           local_offsets,                        /* offset vector */
                   2776:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2777:           local_workspace,                      /* workspace vector */
                   2778:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2779:           rlevel);                              /* function recursion level */
                   2780: 
                   2781:         md->recursive = new_recursive.prevrec;  /* Done this recursion */
                   2782: 
                   2783:         DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
                   2784:           rc));
                   2785: 
                   2786:         /* Ran out of internal offsets */
                   2787: 
                   2788:         if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
                   2789: 
                   2790:         /* For each successful matched substring, set up the next state with a
                   2791:         count of characters to skip before trying it. Note that the count is in
                   2792:         characters, not bytes. */
                   2793: 
                   2794:         if (rc > 0)
                   2795:           {
                   2796:           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
                   2797:             {
                   2798:             int charcount = local_offsets[rc+1] - local_offsets[rc];
1.1.1.2   misho    2799: #ifdef SUPPORT_UTF
1.1.1.3 ! misho    2800:             if (utf)
        !          2801:               {
        !          2802:               const pcre_uchar *p = start_subject + local_offsets[rc];
        !          2803:               const pcre_uchar *pp = start_subject + local_offsets[rc+1];
        !          2804:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
        !          2805:               }
1.1.1.2   misho    2806: #endif
1.1       misho    2807:             if (charcount > 0)
                   2808:               {
                   2809:               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
                   2810:               }
                   2811:             else
                   2812:               {
                   2813:               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
                   2814:               }
                   2815:             }
                   2816:           }
                   2817:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2818:         }
                   2819:       break;
                   2820: 
                   2821:       /*-----------------------------------------------------------------*/
                   2822:       case OP_BRAPOS:
                   2823:       case OP_SBRAPOS:
                   2824:       case OP_CBRAPOS:
                   2825:       case OP_SCBRAPOS:
                   2826:       case OP_BRAPOSZERO:
                   2827:         {
                   2828:         int charcount, matched_count;
1.1.1.2   misho    2829:         const pcre_uchar *local_ptr = ptr;
1.1       misho    2830:         BOOL allow_zero;
                   2831: 
                   2832:         if (codevalue == OP_BRAPOSZERO)
                   2833:           {
                   2834:           allow_zero = TRUE;
                   2835:           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
                   2836:           }
                   2837:         else allow_zero = FALSE;
                   2838: 
                   2839:         /* Loop to match the subpattern as many times as possible as if it were
                   2840:         a complete pattern. */
                   2841: 
                   2842:         for (matched_count = 0;; matched_count++)
                   2843:           {
                   2844:           int local_offsets[2];
                   2845:           int local_workspace[1000];
                   2846: 
                   2847:           int rc = internal_dfa_exec(
                   2848:             md,                                   /* fixed match data */
                   2849:             code,                                 /* this subexpression's code */
                   2850:             local_ptr,                            /* where we currently are */
                   2851:             (int)(ptr - start_subject),           /* start offset */
                   2852:             local_offsets,                        /* offset vector */
                   2853:             sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2854:             local_workspace,                      /* workspace vector */
                   2855:             sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2856:             rlevel);                              /* function recursion level */
                   2857: 
                   2858:           /* Failed to match */
                   2859: 
                   2860:           if (rc < 0)
                   2861:             {
                   2862:             if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2863:             break;
                   2864:             }
                   2865: 
                   2866:           /* Matched: break the loop if zero characters matched. */
                   2867: 
                   2868:           charcount = local_offsets[1] - local_offsets[0];
                   2869:           if (charcount == 0) break;
                   2870:           local_ptr += charcount;    /* Advance temporary position ptr */
                   2871:           }
                   2872: 
                   2873:         /* At this point we have matched the subpattern matched_count
                   2874:         times, and local_ptr is pointing to the character after the end of the
                   2875:         last match. */
                   2876: 
                   2877:         if (matched_count > 0 || allow_zero)
                   2878:           {
1.1.1.2   misho    2879:           const pcre_uchar *end_subpattern = code;
1.1       misho    2880:           int next_state_offset;
                   2881: 
                   2882:           do { end_subpattern += GET(end_subpattern, 1); }
                   2883:             while (*end_subpattern == OP_ALT);
                   2884:           next_state_offset =
                   2885:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2886: 
                   2887:           /* Optimization: if there are no more active states, and there
                   2888:           are no new states yet set up, then skip over the subject string
                   2889:           right here, to save looping. Otherwise, set up the new state to swing
                   2890:           into action when the end of the matched substring is reached. */
                   2891: 
                   2892:           if (i + 1 >= active_count && new_count == 0)
                   2893:             {
                   2894:             ptr = local_ptr;
                   2895:             clen = 0;
                   2896:             ADD_NEW(next_state_offset, 0);
                   2897:             }
                   2898:           else
                   2899:             {
1.1.1.2   misho    2900:             const pcre_uchar *p = ptr;
                   2901:             const pcre_uchar *pp = local_ptr;
1.1       misho    2902:             charcount = (int)(pp - p);
1.1.1.2   misho    2903: #ifdef SUPPORT_UTF
1.1.1.3 ! misho    2904:             if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
1.1.1.2   misho    2905: #endif
1.1       misho    2906:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2907:             }
                   2908:           }
                   2909:         }
                   2910:       break;
                   2911: 
                   2912:       /*-----------------------------------------------------------------*/
                   2913:       case OP_ONCE:
                   2914:       case OP_ONCE_NC:
                   2915:         {
                   2916:         int local_offsets[2];
                   2917:         int local_workspace[1000];
                   2918: 
                   2919:         int rc = internal_dfa_exec(
                   2920:           md,                                   /* fixed match data */
                   2921:           code,                                 /* this subexpression's code */
                   2922:           ptr,                                  /* where we currently are */
                   2923:           (int)(ptr - start_subject),           /* start offset */
                   2924:           local_offsets,                        /* offset vector */
                   2925:           sizeof(local_offsets)/sizeof(int),    /* size of same */
                   2926:           local_workspace,                      /* workspace vector */
                   2927:           sizeof(local_workspace)/sizeof(int),  /* size of same */
                   2928:           rlevel);                              /* function recursion level */
                   2929: 
                   2930:         if (rc >= 0)
                   2931:           {
1.1.1.2   misho    2932:           const pcre_uchar *end_subpattern = code;
1.1       misho    2933:           int charcount = local_offsets[1] - local_offsets[0];
                   2934:           int next_state_offset, repeat_state_offset;
                   2935: 
                   2936:           do { end_subpattern += GET(end_subpattern, 1); }
                   2937:             while (*end_subpattern == OP_ALT);
                   2938:           next_state_offset =
                   2939:             (int)(end_subpattern - start_code + LINK_SIZE + 1);
                   2940: 
                   2941:           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
                   2942:           arrange for the repeat state also to be added to the relevant list.
                   2943:           Calculate the offset, or set -1 for no repeat. */
                   2944: 
                   2945:           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
                   2946:                                  *end_subpattern == OP_KETRMIN)?
                   2947:             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
                   2948: 
                   2949:           /* If we have matched an empty string, add the next state at the
                   2950:           current character pointer. This is important so that the duplicate
                   2951:           checking kicks in, which is what breaks infinite loops that match an
                   2952:           empty string. */
                   2953: 
                   2954:           if (charcount == 0)
                   2955:             {
                   2956:             ADD_ACTIVE(next_state_offset, 0);
                   2957:             }
                   2958: 
                   2959:           /* Optimization: if there are no more active states, and there
                   2960:           are no new states yet set up, then skip over the subject string
                   2961:           right here, to save looping. Otherwise, set up the new state to swing
                   2962:           into action when the end of the matched substring is reached. */
                   2963: 
                   2964:           else if (i + 1 >= active_count && new_count == 0)
                   2965:             {
                   2966:             ptr += charcount;
                   2967:             clen = 0;
                   2968:             ADD_NEW(next_state_offset, 0);
                   2969: 
                   2970:             /* If we are adding a repeat state at the new character position,
                   2971:             we must fudge things so that it is the only current state.
                   2972:             Otherwise, it might be a duplicate of one we processed before, and
                   2973:             that would cause it to be skipped. */
                   2974: 
                   2975:             if (repeat_state_offset >= 0)
                   2976:               {
                   2977:               next_active_state = active_states;
                   2978:               active_count = 0;
                   2979:               i = -1;
                   2980:               ADD_ACTIVE(repeat_state_offset, 0);
                   2981:               }
                   2982:             }
                   2983:           else
                   2984:             {
1.1.1.2   misho    2985: #ifdef SUPPORT_UTF
1.1.1.3 ! misho    2986:             if (utf)
        !          2987:               {
        !          2988:               const pcre_uchar *p = start_subject + local_offsets[0];
        !          2989:               const pcre_uchar *pp = start_subject + local_offsets[1];
        !          2990:               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
        !          2991:               }
1.1.1.2   misho    2992: #endif
1.1       misho    2993:             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
                   2994:             if (repeat_state_offset >= 0)
                   2995:               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
                   2996:             }
                   2997:           }
                   2998:         else if (rc != PCRE_ERROR_NOMATCH) return rc;
                   2999:         }
                   3000:       break;
                   3001: 
                   3002: 
                   3003: /* ========================================================================== */
                   3004:       /* Handle callouts */
                   3005: 
                   3006:       case OP_CALLOUT:
                   3007:       rrc = 0;
1.1.1.2   misho    3008:       if (PUBL(callout) != NULL)
1.1       misho    3009:         {
1.1.1.2   misho    3010:         PUBL(callout_block) cb;
1.1       misho    3011:         cb.version          = 1;   /* Version 1 of the callout block */
                   3012:         cb.callout_number   = code[1];
                   3013:         cb.offset_vector    = offsets;
1.1.1.2   misho    3014: #ifdef COMPILE_PCRE8
1.1       misho    3015:         cb.subject          = (PCRE_SPTR)start_subject;
1.1.1.2   misho    3016: #else
                   3017:         cb.subject          = (PCRE_SPTR16)start_subject;
                   3018: #endif
1.1       misho    3019:         cb.subject_length   = (int)(end_subject - start_subject);
                   3020:         cb.start_match      = (int)(current_subject - start_subject);
                   3021:         cb.current_position = (int)(ptr - start_subject);
                   3022:         cb.pattern_position = GET(code, 2);
                   3023:         cb.next_item_length = GET(code, 2 + LINK_SIZE);
                   3024:         cb.capture_top      = 1;
                   3025:         cb.capture_last     = -1;
                   3026:         cb.callout_data     = md->callout_data;
                   3027:         cb.mark             = NULL;   /* No (*MARK) support */
1.1.1.2   misho    3028:         if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
1.1       misho    3029:         }
                   3030:       if (rrc == 0)
1.1.1.2   misho    3031:         { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
1.1       misho    3032:       break;
                   3033: 
                   3034: 
                   3035: /* ========================================================================== */
                   3036:       default:        /* Unsupported opcode */
                   3037:       return PCRE_ERROR_DFA_UITEM;
                   3038:       }
                   3039: 
                   3040:     NEXT_ACTIVE_STATE: continue;
                   3041: 
                   3042:     }      /* End of loop scanning active states */
                   3043: 
                   3044:   /* We have finished the processing at the current subject character. If no
                   3045:   new states have been set for the next character, we have found all the
                   3046:   matches that we are going to find. If we are at the top level and partial
                   3047:   matching has been requested, check for appropriate conditions.
                   3048: 
                   3049:   The "forced_ fail" variable counts the number of (*F) encountered for the
                   3050:   character. If it is equal to the original active_count (saved in
                   3051:   workspace[1]) it means that (*F) was found on every active state. In this
                   3052:   case we don't want to give a partial match.
                   3053: 
                   3054:   The "could_continue" variable is true if a state could have continued but
                   3055:   for the fact that the end of the subject was reached. */
                   3056: 
                   3057:   if (new_count <= 0)
                   3058:     {
                   3059:     if (rlevel == 1 &&                               /* Top level, and */
1.1.1.3 ! misho    3060:         could_continue &&                            /* Some could go on, and */
1.1       misho    3061:         forced_fail != workspace[1] &&               /* Not all forced fail & */
                   3062:         (                                            /* either... */
                   3063:         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
                   3064:         ||                                           /* or... */
                   3065:         ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
                   3066:          match_count < 0)                            /* no matches */
                   3067:         ) &&                                         /* And... */
1.1.1.3 ! misho    3068:         (
        !          3069:         partial_newline ||                           /* Either partial NL */
        !          3070:           (                                          /* or ... */
        !          3071:           ptr >= end_subject &&                /* End of subject and */
        !          3072:           ptr > md->start_used_ptr)            /* Inspected non-empty string */
        !          3073:           )
        !          3074:         )
1.1       misho    3075:       {
                   3076:       if (offsetcount >= 2)
                   3077:         {
                   3078:         offsets[0] = (int)(md->start_used_ptr - start_subject);
                   3079:         offsets[1] = (int)(end_subject - start_subject);
                   3080:         }
                   3081:       match_count = PCRE_ERROR_PARTIAL;
                   3082:       }
                   3083: 
                   3084:     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
                   3085:       "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
                   3086:       rlevel*2-2, SP));
                   3087:     break;        /* In effect, "return", but see the comment below */
                   3088:     }
                   3089: 
                   3090:   /* One or more states are active for the next character. */
                   3091: 
                   3092:   ptr += clen;    /* Advance to next subject character */
                   3093:   }               /* Loop to move along the subject string */
                   3094: 
                   3095: /* Control gets here from "break" a few lines above. We do it this way because
                   3096: if we use "return" above, we have compiler trouble. Some compilers warn if
                   3097: there's nothing here because they think the function doesn't return a value. On
                   3098: the other hand, if we put a dummy statement here, some more clever compilers
                   3099: complain that it can't be reached. Sigh. */
                   3100: 
                   3101: return match_count;
                   3102: }
                   3103: 
                   3104: 
                   3105: 
                   3106: 
                   3107: /*************************************************
                   3108: *    Execute a Regular Expression - DFA engine   *
                   3109: *************************************************/
                   3110: 
                   3111: /* This external function applies a compiled re to a subject string using a DFA
                   3112: engine. This function calls the internal function multiple times if the pattern
                   3113: is not anchored.
                   3114: 
                   3115: Arguments:
                   3116:   argument_re     points to the compiled expression
                   3117:   extra_data      points to extra data or is NULL
                   3118:   subject         points to the subject string
                   3119:   length          length of subject string (may contain binary zeros)
                   3120:   start_offset    where to start in the subject string
                   3121:   options         option bits
                   3122:   offsets         vector of match offsets
                   3123:   offsetcount     size of same
                   3124:   workspace       workspace vector
                   3125:   wscount         size of same
                   3126: 
                   3127: Returns:          > 0 => number of match offset pairs placed in offsets
                   3128:                   = 0 => offsets overflowed; longest matches are present
                   3129:                    -1 => failed to match
                   3130:                  < -1 => some kind of unexpected problem
                   3131: */
                   3132: 
1.1.1.2   misho    3133: #ifdef COMPILE_PCRE8
1.1       misho    3134: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3135: pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   3136:   const char *subject, int length, int start_offset, int options, int *offsets,
                   3137:   int offsetcount, int *workspace, int wscount)
1.1.1.2   misho    3138: #else
                   3139: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   3140: pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
                   3141:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
                   3142:   int offsetcount, int *workspace, int wscount)
                   3143: #endif
1.1       misho    3144: {
1.1.1.2   misho    3145: REAL_PCRE *re = (REAL_PCRE *)argument_re;
1.1       misho    3146: dfa_match_data match_block;
                   3147: dfa_match_data *md = &match_block;
1.1.1.2   misho    3148: BOOL utf, anchored, startline, firstline;
                   3149: const pcre_uchar *current_subject, *end_subject;
1.1       misho    3150: const pcre_study_data *study = NULL;
                   3151: 
1.1.1.2   misho    3152: const pcre_uchar *req_char_ptr;
                   3153: const pcre_uint8 *start_bits = NULL;
                   3154: BOOL has_first_char = FALSE;
                   3155: BOOL has_req_char = FALSE;
                   3156: pcre_uchar first_char = 0;
                   3157: pcre_uchar first_char2 = 0;
                   3158: pcre_uchar req_char = 0;
                   3159: pcre_uchar req_char2 = 0;
1.1       misho    3160: int newline;
                   3161: 
                   3162: /* Plausibility checks */
                   3163: 
                   3164: if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   3165: if (re == NULL || subject == NULL || workspace == NULL ||
                   3166:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   3167: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   3168: if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
                   3169: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   3170: 
1.1.1.3 ! misho    3171: /* Check that the first field in the block is the magic number. If it is not,
        !          3172: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
        !          3173: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
        !          3174: means that the pattern is likely compiled with different endianness. */
        !          3175: 
        !          3176: if (re->magic_number != MAGIC_NUMBER)
        !          3177:   return re->magic_number == REVERSED_MAGIC_NUMBER?
        !          3178:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
        !          3179: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
        !          3180: 
        !          3181: /* If restarting after a partial match, do some sanity checks on the contents
        !          3182: of the workspace. */
        !          3183: 
        !          3184: if ((options & PCRE_DFA_RESTART) != 0)
        !          3185:   {
        !          3186:   if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
        !          3187:     workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
        !          3188:       return PCRE_ERROR_DFA_BADRESTART;
        !          3189:   }
        !          3190: 
        !          3191: /* Set up study, callout, and table data */
1.1       misho    3192: 
                   3193: md->tables = re->tables;
                   3194: md->callout_data = NULL;
                   3195: 
                   3196: if (extra_data != NULL)
                   3197:   {
                   3198:   unsigned int flags = extra_data->flags;
                   3199:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   3200:     study = (const pcre_study_data *)extra_data->study_data;
                   3201:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
                   3202:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   3203:     return PCRE_ERROR_DFA_UMLIMIT;
                   3204:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   3205:     md->callout_data = extra_data->callout_data;
                   3206:   if ((flags & PCRE_EXTRA_TABLES) != 0)
                   3207:     md->tables = extra_data->tables;
                   3208:   }
                   3209: 
                   3210: /* Set some local values */
                   3211: 
1.1.1.2   misho    3212: current_subject = (const pcre_uchar *)subject + start_offset;
                   3213: end_subject = (const pcre_uchar *)subject + length;
                   3214: req_char_ptr = current_subject - 1;
                   3215: 
                   3216: #ifdef SUPPORT_UTF
                   3217: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
                   3218: utf = (re->options & PCRE_UTF8) != 0;
1.1       misho    3219: #else
1.1.1.2   misho    3220: utf = FALSE;
1.1       misho    3221: #endif
                   3222: 
                   3223: anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
                   3224:   (re->options & PCRE_ANCHORED) != 0;
                   3225: 
                   3226: /* The remaining fixed data for passing around. */
                   3227: 
1.1.1.2   misho    3228: md->start_code = (const pcre_uchar *)argument_re +
1.1       misho    3229:     re->name_table_offset + re->name_count * re->name_entry_size;
1.1.1.2   misho    3230: md->start_subject = (const pcre_uchar *)subject;
1.1       misho    3231: md->end_subject = end_subject;
                   3232: md->start_offset = start_offset;
                   3233: md->moptions = options;
                   3234: md->poptions = re->options;
                   3235: 
                   3236: /* If the BSR option is not set at match time, copy what was set
                   3237: at compile time. */
                   3238: 
                   3239: if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
                   3240:   {
                   3241:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   3242:     md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
                   3243: #ifdef BSR_ANYCRLF
                   3244:   else md->moptions |= PCRE_BSR_ANYCRLF;
                   3245: #endif
                   3246:   }
                   3247: 
                   3248: /* Handle different types of newline. The three bits give eight cases. If
                   3249: nothing is set at run time, whatever was used at compile time applies. */
                   3250: 
                   3251: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
                   3252:          PCRE_NEWLINE_BITS)
                   3253:   {
                   3254:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   3255:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   3256:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   3257:   case PCRE_NEWLINE_CR+
                   3258:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   3259:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   3260:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   3261:   default: return PCRE_ERROR_BADNEWLINE;
                   3262:   }
                   3263: 
                   3264: if (newline == -2)
                   3265:   {
                   3266:   md->nltype = NLTYPE_ANYCRLF;
                   3267:   }
                   3268: else if (newline < 0)
                   3269:   {
                   3270:   md->nltype = NLTYPE_ANY;
                   3271:   }
                   3272: else
                   3273:   {
                   3274:   md->nltype = NLTYPE_FIXED;
                   3275:   if (newline > 255)
                   3276:     {
                   3277:     md->nllen = 2;
                   3278:     md->nl[0] = (newline >> 8) & 255;
                   3279:     md->nl[1] = newline & 255;
                   3280:     }
                   3281:   else
                   3282:     {
                   3283:     md->nllen = 1;
                   3284:     md->nl[0] = newline;
                   3285:     }
                   3286:   }
                   3287: 
                   3288: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   3289: back the character offset. */
                   3290: 
1.1.1.2   misho    3291: #ifdef SUPPORT_UTF
                   3292: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
1.1       misho    3293:   {
                   3294:   int erroroffset;
1.1.1.2   misho    3295:   int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
1.1       misho    3296:   if (errorcode != 0)
                   3297:     {
                   3298:     if (offsetcount >= 2)
                   3299:       {
                   3300:       offsets[0] = erroroffset;
                   3301:       offsets[1] = errorcode;
                   3302:       }
                   3303:     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
                   3304:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
                   3305:     }
                   3306:   if (start_offset > 0 && start_offset < length &&
1.1.1.2   misho    3307:         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
1.1       misho    3308:     return PCRE_ERROR_BADUTF8_OFFSET;
                   3309:   }
                   3310: #endif
                   3311: 
                   3312: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   3313: is a feature that makes it possible to save compiled regex and re-use them
                   3314: in other programs later. */
                   3315: 
1.1.1.2   misho    3316: if (md->tables == NULL) md->tables = PRIV(default_tables);
1.1       misho    3317: 
1.1.1.2   misho    3318: /* The "must be at the start of a line" flags are used in a loop when finding
                   3319: where to start. */
1.1       misho    3320: 
                   3321: startline = (re->flags & PCRE_STARTLINE) != 0;
                   3322: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   3323: 
                   3324: /* Set up the first character to match, if available. The first_byte value is
                   3325: never set for an anchored regular expression, but the anchoring may be forced
                   3326: at run time, so we have to test for anchoring. The first char may be unset for
                   3327: an unanchored pattern, of course. If there's no first char and the pattern was
                   3328: studied, there may be a bitmap of possible first characters. */
                   3329: 
                   3330: if (!anchored)
                   3331:   {
                   3332:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   3333:     {
1.1.1.2   misho    3334:     has_first_char = TRUE;
                   3335:     first_char = first_char2 = (pcre_uchar)(re->first_char);
                   3336:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
                   3337:       {
                   3338:       first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
                   3339: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   3340:       if (utf && first_char > 127)
                   3341:         first_char2 = UCD_OTHERCASE(first_char);
                   3342: #endif
                   3343:       }
1.1       misho    3344:     }
                   3345:   else
                   3346:     {
                   3347:     if (!startline && study != NULL &&
                   3348:          (study->flags & PCRE_STUDY_MAPPED) != 0)
                   3349:       start_bits = study->start_bits;
                   3350:     }
                   3351:   }
                   3352: 
                   3353: /* For anchored or unanchored matches, there may be a "last known required
                   3354: character" set. */
                   3355: 
                   3356: if ((re->flags & PCRE_REQCHSET) != 0)
                   3357:   {
1.1.1.2   misho    3358:   has_req_char = TRUE;
                   3359:   req_char = req_char2 = (pcre_uchar)(re->req_char);
                   3360:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
                   3361:     {
                   3362:     req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
                   3363: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   3364:     if (utf && req_char > 127)
                   3365:       req_char2 = UCD_OTHERCASE(req_char);
                   3366: #endif
                   3367:     }
1.1       misho    3368:   }
                   3369: 
                   3370: /* Call the main matching function, looping for a non-anchored regex after a
                   3371: failed match. If not restarting, perform certain optimizations at the start of
                   3372: a match. */
                   3373: 
                   3374: for (;;)
                   3375:   {
                   3376:   int rc;
                   3377: 
                   3378:   if ((options & PCRE_DFA_RESTART) == 0)
                   3379:     {
1.1.1.2   misho    3380:     const pcre_uchar *save_end_subject = end_subject;
1.1       misho    3381: 
                   3382:     /* If firstline is TRUE, the start of the match is constrained to the first
                   3383:     line of a multiline string. Implement this by temporarily adjusting
                   3384:     end_subject so that we stop scanning at a newline. If the match fails at
                   3385:     the newline, later code breaks this loop. */
                   3386: 
                   3387:     if (firstline)
                   3388:       {
1.1.1.2   misho    3389:       PCRE_PUCHAR t = current_subject;
                   3390: #ifdef SUPPORT_UTF
                   3391:       if (utf)
1.1       misho    3392:         {
                   3393:         while (t < md->end_subject && !IS_NEWLINE(t))
                   3394:           {
                   3395:           t++;
1.1.1.2   misho    3396:           ACROSSCHAR(t < end_subject, *t, t++);
1.1       misho    3397:           }
                   3398:         }
                   3399:       else
                   3400: #endif
                   3401:       while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   3402:       end_subject = t;
                   3403:       }
                   3404: 
                   3405:     /* There are some optimizations that avoid running the match if a known
                   3406:     starting point is not found. However, there is an option that disables
                   3407:     these, for testing and for ensuring that all callouts do actually occur.
                   3408:     The option can be set in the regex by (*NO_START_OPT) or passed in
                   3409:     match-time options. */
                   3410: 
                   3411:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   3412:       {
1.1.1.2   misho    3413:       /* Advance to a known first char. */
1.1       misho    3414: 
1.1.1.2   misho    3415:       if (has_first_char)
1.1       misho    3416:         {
1.1.1.2   misho    3417:         if (first_char != first_char2)
1.1       misho    3418:           while (current_subject < end_subject &&
1.1.1.2   misho    3419:               *current_subject != first_char && *current_subject != first_char2)
1.1       misho    3420:             current_subject++;
                   3421:         else
                   3422:           while (current_subject < end_subject &&
1.1.1.2   misho    3423:                  *current_subject != first_char)
1.1       misho    3424:             current_subject++;
                   3425:         }
                   3426: 
                   3427:       /* Or to just after a linebreak for a multiline match if possible */
                   3428: 
                   3429:       else if (startline)
                   3430:         {
                   3431:         if (current_subject > md->start_subject + start_offset)
                   3432:           {
1.1.1.2   misho    3433: #ifdef SUPPORT_UTF
                   3434:           if (utf)
1.1       misho    3435:             {
                   3436:             while (current_subject < end_subject &&
                   3437:                    !WAS_NEWLINE(current_subject))
                   3438:               {
                   3439:               current_subject++;
1.1.1.2   misho    3440:               ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3441:                 current_subject++);
1.1       misho    3442:               }
                   3443:             }
                   3444:           else
                   3445: #endif
                   3446:           while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
                   3447:             current_subject++;
                   3448: 
                   3449:           /* If we have just passed a CR and the newline option is ANY or
                   3450:           ANYCRLF, and we are now at a LF, advance the match position by one
                   3451:           more character. */
                   3452: 
                   3453:           if (current_subject[-1] == CHAR_CR &&
                   3454:                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   3455:                current_subject < end_subject &&
                   3456:                *current_subject == CHAR_NL)
                   3457:             current_subject++;
                   3458:           }
                   3459:         }
                   3460: 
                   3461:       /* Or to a non-unique first char after study */
                   3462: 
                   3463:       else if (start_bits != NULL)
                   3464:         {
                   3465:         while (current_subject < end_subject)
                   3466:           {
                   3467:           register unsigned int c = *current_subject;
1.1.1.2   misho    3468: #ifndef COMPILE_PCRE8
                   3469:           if (c > 255) c = 255;
                   3470: #endif
1.1       misho    3471:           if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   3472:             {
                   3473:             current_subject++;
1.1.1.2   misho    3474: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
                   3475:             /* In non 8-bit mode, the iteration will stop for
                   3476:             characters > 255 at the beginning or not stop at all. */
                   3477:             if (utf)
                   3478:               ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3479:                 current_subject++);
1.1       misho    3480: #endif
                   3481:             }
                   3482:           else break;
                   3483:           }
                   3484:         }
                   3485:       }
                   3486: 
                   3487:     /* Restore fudged end_subject */
                   3488: 
                   3489:     end_subject = save_end_subject;
                   3490: 
                   3491:     /* The following two optimizations are disabled for partial matching or if
                   3492:     disabling is explicitly requested (and of course, by the test above, this
                   3493:     code is not obeyed when restarting after a partial match). */
                   3494: 
                   3495:     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
                   3496:         (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
                   3497:       {
                   3498:       /* If the pattern was studied, a minimum subject length may be set. This
                   3499:       is a lower bound; no actual string of that length may actually match the
                   3500:       pattern. Although the value is, strictly, in characters, we treat it as
                   3501:       bytes to avoid spending too much time in this optimization. */
                   3502: 
                   3503:       if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   3504:           (pcre_uint32)(end_subject - current_subject) < study->minlength)
                   3505:         return PCRE_ERROR_NOMATCH;
                   3506: 
1.1.1.2   misho    3507:       /* If req_char is set, we know that that character must appear in the
                   3508:       subject for the match to succeed. If the first character is set, req_char
1.1       misho    3509:       must be later in the subject; otherwise the test starts at the match
                   3510:       point. This optimization can save a huge amount of work in patterns with
                   3511:       nested unlimited repeats that aren't going to match. Writing separate
                   3512:       code for cased/caseless versions makes it go faster, as does using an
                   3513:       autoincrement and backing off on a match.
                   3514: 
                   3515:       HOWEVER: when the subject string is very, very long, searching to its end
                   3516:       can take a long time, and give bad performance on quite ordinary
                   3517:       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
                   3518:       string... so we don't do this when the string is sufficiently long. */
                   3519: 
1.1.1.2   misho    3520:       if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
1.1       misho    3521:         {
1.1.1.2   misho    3522:         register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
1.1       misho    3523: 
                   3524:         /* We don't need to repeat the search if we haven't yet reached the
                   3525:         place we found it at last time. */
                   3526: 
1.1.1.2   misho    3527:         if (p > req_char_ptr)
1.1       misho    3528:           {
1.1.1.2   misho    3529:           if (req_char != req_char2)
1.1       misho    3530:             {
                   3531:             while (p < end_subject)
                   3532:               {
                   3533:               register int pp = *p++;
1.1.1.2   misho    3534:               if (pp == req_char || pp == req_char2) { p--; break; }
1.1       misho    3535:               }
                   3536:             }
                   3537:           else
                   3538:             {
                   3539:             while (p < end_subject)
                   3540:               {
1.1.1.2   misho    3541:               if (*p++ == req_char) { p--; break; }
1.1       misho    3542:               }
                   3543:             }
                   3544: 
                   3545:           /* If we can't find the required character, break the matching loop,
                   3546:           which will cause a return or PCRE_ERROR_NOMATCH. */
                   3547: 
                   3548:           if (p >= end_subject) break;
                   3549: 
                   3550:           /* If we have found the required character, save the point where we
                   3551:           found it, so that we don't search again next time round the loop if
                   3552:           the start hasn't passed this character yet. */
                   3553: 
1.1.1.2   misho    3554:           req_char_ptr = p;
1.1       misho    3555:           }
                   3556:         }
                   3557:       }
                   3558:     }   /* End of optimizations that are done when not restarting */
                   3559: 
                   3560:   /* OK, now we can do the business */
                   3561: 
                   3562:   md->start_used_ptr = current_subject;
                   3563:   md->recursive = NULL;
                   3564: 
                   3565:   rc = internal_dfa_exec(
                   3566:     md,                                /* fixed match data */
                   3567:     md->start_code,                    /* this subexpression's code */
                   3568:     current_subject,                   /* where we currently are */
                   3569:     start_offset,                      /* start offset in subject */
                   3570:     offsets,                           /* offset vector */
                   3571:     offsetcount,                       /* size of same */
                   3572:     workspace,                         /* workspace vector */
                   3573:     wscount,                           /* size of same */
                   3574:     0);                                /* function recurse level */
                   3575: 
                   3576:   /* Anything other than "no match" means we are done, always; otherwise, carry
                   3577:   on only if not anchored. */
                   3578: 
                   3579:   if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
                   3580: 
                   3581:   /* Advance to the next subject character unless we are at the end of a line
                   3582:   and firstline is set. */
                   3583: 
                   3584:   if (firstline && IS_NEWLINE(current_subject)) break;
                   3585:   current_subject++;
1.1.1.2   misho    3586: #ifdef SUPPORT_UTF
                   3587:   if (utf)
1.1       misho    3588:     {
1.1.1.2   misho    3589:     ACROSSCHAR(current_subject < end_subject, *current_subject,
                   3590:       current_subject++);
1.1       misho    3591:     }
1.1.1.2   misho    3592: #endif
1.1       misho    3593:   if (current_subject > end_subject) break;
                   3594: 
                   3595:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   3596:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   3597:   or ANY or ANYCRLF, advance the match position by one more character. */
                   3598: 
                   3599:   if (current_subject[-1] == CHAR_CR &&
                   3600:       current_subject < end_subject &&
                   3601:       *current_subject == CHAR_NL &&
                   3602:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   3603:         (md->nltype == NLTYPE_ANY ||
                   3604:          md->nltype == NLTYPE_ANYCRLF ||
                   3605:          md->nllen == 2))
                   3606:     current_subject++;
                   3607: 
                   3608:   }   /* "Bumpalong" loop */
                   3609: 
                   3610: return PCRE_ERROR_NOMATCH;
                   3611: }
                   3612: 
                   3613: /* End of pcre_dfa_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>