Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_exec.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2010 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #include "config.h"
                     46: 
                     47: #define NLBLOCK md             /* Block containing newline information */
                     48: #define PSSTART start_subject  /* Field containing processed string start */
                     49: #define PSEND   end_subject    /* Field containing processed string end */
                     50: 
                     51: #include "pcre_internal.h"
                     52: 
                     53: /* Undefine some potentially clashing cpp symbols */
                     54: 
                     55: #undef min
                     56: #undef max
                     57: 
                     58: /* Flag bits for the match() function */
                     59: 
                     60: #define match_condassert     0x01  /* Called to check a condition assertion */
                     61: #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
                     62: 
                     63: /* Non-error returns from the match() function. Error returns are externally
                     64: defined PCRE_ERROR_xxx codes, which are all negative. */
                     65: 
                     66: #define MATCH_MATCH        1
                     67: #define MATCH_NOMATCH      0
                     68: 
                     69: /* Special internal returns from the match() function. Make them sufficiently
                     70: negative to avoid the external error codes. */
                     71: 
                     72: #define MATCH_ACCEPT       (-999)
                     73: #define MATCH_COMMIT       (-998)
                     74: #define MATCH_PRUNE        (-997)
                     75: #define MATCH_SKIP         (-996)
                     76: #define MATCH_SKIP_ARG     (-995)
                     77: #define MATCH_THEN         (-994)
                     78: 
                     79: /* This is a convenience macro for code that occurs many times. */
                     80: 
                     81: #define MRRETURN(ra) \
                     82:   { \
                     83:   md->mark = markptr; \
                     84:   RRETURN(ra); \
                     85:   }
                     86: 
                     87: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     88: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     89: because the offset vector is always a multiple of 3 long. */
                     90: 
                     91: #define REC_STACK_SAVE_MAX 30
                     92: 
                     93: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     94: 
                     95: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     96: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     97: 
                     98: 
                     99: 
                    100: #ifdef PCRE_DEBUG
                    101: /*************************************************
                    102: *        Debugging function to print chars       *
                    103: *************************************************/
                    104: 
                    105: /* Print a sequence of chars in printable format, stopping at the end of the
                    106: subject if the requested.
                    107: 
                    108: Arguments:
                    109:   p           points to characters
                    110:   length      number to print
                    111:   is_subject  TRUE if printing from within md->start_subject
                    112:   md          pointer to matching data block, if is_subject is TRUE
                    113: 
                    114: Returns:     nothing
                    115: */
                    116: 
                    117: static void
                    118: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
                    119: {
                    120: unsigned int c;
                    121: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    122: while (length-- > 0)
                    123:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    124: }
                    125: #endif
                    126: 
                    127: 
                    128: 
                    129: /*************************************************
                    130: *          Match a back-reference                *
                    131: *************************************************/
                    132: 
                    133: /* If a back reference hasn't been set, the length that is passed is greater
                    134: than the number of characters left in the string, so the match fails.
                    135: 
                    136: Arguments:
                    137:   offset      index into the offset vector
                    138:   eptr        points into the subject
                    139:   length      length to be matched
                    140:   md          points to match data block
                    141:   ims         the ims flags
                    142: 
                    143: Returns:      TRUE if matched
                    144: */
                    145: 
                    146: static BOOL
                    147: match_ref(int offset, register USPTR eptr, int length, match_data *md,
                    148:   unsigned long int ims)
                    149: {
                    150: USPTR p = md->start_subject + md->offset_vector[offset];
                    151: 
                    152: #ifdef PCRE_DEBUG
                    153: if (eptr >= md->end_subject)
                    154:   printf("matching subject <null>");
                    155: else
                    156:   {
                    157:   printf("matching subject ");
                    158:   pchars(eptr, length, TRUE, md);
                    159:   }
                    160: printf(" against backref ");
                    161: pchars(p, length, FALSE, md);
                    162: printf("\n");
                    163: #endif
                    164: 
                    165: /* Always fail if not enough characters left */
                    166: 
                    167: if (length > md->end_subject - eptr) return FALSE;
                    168: 
                    169: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    170: properly if Unicode properties are supported. Otherwise, we can check only
                    171: ASCII characters. */
                    172: 
                    173: if ((ims & PCRE_CASELESS) != 0)
                    174:   {
                    175: #ifdef SUPPORT_UTF8
                    176: #ifdef SUPPORT_UCP
                    177:   if (md->utf8)
                    178:     {
                    179:     USPTR endptr = eptr + length;
                    180:     while (eptr < endptr)
                    181:       {
                    182:       int c, d;
                    183:       GETCHARINC(c, eptr);
                    184:       GETCHARINC(d, p);
                    185:       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
                    186:       }
                    187:     }
                    188:   else
                    189: #endif
                    190: #endif
                    191: 
                    192:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    193:   is no UCP support. */
                    194: 
                    195:   while (length-- > 0)
                    196:     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
                    197:   }
                    198: 
                    199: /* In the caseful case, we can just compare the bytes, whether or not we
                    200: are in UTF-8 mode. */
                    201: 
                    202: else
                    203:   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
                    204: 
                    205: return TRUE;
                    206: }
                    207: 
                    208: 
                    209: 
                    210: /***************************************************************************
                    211: ****************************************************************************
                    212:                    RECURSION IN THE match() FUNCTION
                    213: 
                    214: The match() function is highly recursive, though not every recursive call
                    215: increases the recursive depth. Nevertheless, some regular expressions can cause
                    216: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    217: itself recursively. This uses the stack for saving everything that has to be
                    218: saved for a recursive call. On Unix, the stack can be large, and this works
                    219: fine.
                    220: 
                    221: It turns out that on some non-Unix-like systems there are problems with
                    222: programs that use a lot of stack. (This despite the fact that every last chip
                    223: has oodles of memory these days, and techniques for extending the stack have
                    224: been known for decades.) So....
                    225: 
                    226: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    227: calls by keeping local variables that need to be preserved in blocks of memory
                    228: obtained from malloc() instead instead of on the stack. Macros are used to
                    229: achieve this so that the actual code doesn't look very different to what it
                    230: always used to.
                    231: 
                    232: The original heap-recursive code used longjmp(). However, it seems that this
                    233: can be very slow on some operating systems. Following a suggestion from Stan
                    234: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    235: provide a unique number for each call to RMATCH. There is no way of generating
                    236: a sequence of numbers at compile time in C. I have given them names, to make
                    237: them stand out more clearly.
                    238: 
                    239: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    240: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    241: tests. Furthermore, not using longjmp() means that local dynamic variables
                    242: don't have indeterminate values; this has meant that the frame size can be
                    243: reduced because the result can be "passed back" by straight setting of the
                    244: variable instead of being passed in the frame.
                    245: ****************************************************************************
                    246: ***************************************************************************/
                    247: 
                    248: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    249: below must be updated in sync.  */
                    250: 
                    251: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    252:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    253:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    254:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    255:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    256:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
                    257:        RM61,  RM62 };
                    258: 
                    259: /* These versions of the macros use the stack, as normal. There are debugging
                    260: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    261: actually used in this definition. */
                    262: 
                    263: #ifndef NO_RECURSE
                    264: #define REGISTER register
                    265: 
                    266: #ifdef PCRE_DEBUG
                    267: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    268:   { \
                    269:   printf("match() called in line %d\n", __LINE__); \
                    270:   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
                    271:   printf("to line %d\n", __LINE__); \
                    272:   }
                    273: #define RRETURN(ra) \
                    274:   { \
                    275:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    276:   return ra; \
                    277:   }
                    278: #else
                    279: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    280:   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
                    281: #define RRETURN(ra) return ra
                    282: #endif
                    283: 
                    284: #else
                    285: 
                    286: 
                    287: /* These versions of the macros manage a private stack on the heap. Note that
                    288: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    289: argument of match(), which never changes. */
                    290: 
                    291: #define REGISTER
                    292: 
                    293: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
                    294:   {\
                    295:   heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
                    296:   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
                    297:   frame->Xwhere = rw; \
                    298:   newframe->Xeptr = ra;\
                    299:   newframe->Xecode = rb;\
                    300:   newframe->Xmstart = mstart;\
                    301:   newframe->Xmarkptr = markptr;\
                    302:   newframe->Xoffset_top = rc;\
                    303:   newframe->Xims = re;\
                    304:   newframe->Xeptrb = rf;\
                    305:   newframe->Xflags = rg;\
                    306:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    307:   newframe->Xprevframe = frame;\
                    308:   frame = newframe;\
                    309:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    310:   goto HEAP_RECURSE;\
                    311:   L_##rw:\
                    312:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    313:   }
                    314: 
                    315: #define RRETURN(ra)\
                    316:   {\
                    317:   heapframe *oldframe = frame;\
                    318:   frame = oldframe->Xprevframe;\
                    319:   (pcre_stack_free)(oldframe);\
                    320:   if (frame != NULL)\
                    321:     {\
                    322:     rrc = ra;\
                    323:     goto HEAP_RETURN;\
                    324:     }\
                    325:   return ra;\
                    326:   }
                    327: 
                    328: 
                    329: /* Structure for remembering the local variables in a private frame */
                    330: 
                    331: typedef struct heapframe {
                    332:   struct heapframe *Xprevframe;
                    333: 
                    334:   /* Function arguments that may change */
                    335: 
                    336:   USPTR Xeptr;
                    337:   const uschar *Xecode;
                    338:   USPTR Xmstart;
                    339:   USPTR Xmarkptr;
                    340:   int Xoffset_top;
                    341:   long int Xims;
                    342:   eptrblock *Xeptrb;
                    343:   int Xflags;
                    344:   unsigned int Xrdepth;
                    345: 
                    346:   /* Function local variables */
                    347: 
                    348:   USPTR Xcallpat;
                    349: #ifdef SUPPORT_UTF8
                    350:   USPTR Xcharptr;
                    351: #endif
                    352:   USPTR Xdata;
                    353:   USPTR Xnext;
                    354:   USPTR Xpp;
                    355:   USPTR Xprev;
                    356:   USPTR Xsaved_eptr;
                    357: 
                    358:   recursion_info Xnew_recursive;
                    359: 
                    360:   BOOL Xcur_is_word;
                    361:   BOOL Xcondition;
                    362:   BOOL Xprev_is_word;
                    363: 
                    364:   unsigned long int Xoriginal_ims;
                    365: 
                    366: #ifdef SUPPORT_UCP
                    367:   int Xprop_type;
                    368:   int Xprop_value;
                    369:   int Xprop_fail_result;
                    370:   int Xprop_category;
                    371:   int Xprop_chartype;
                    372:   int Xprop_script;
                    373:   int Xoclength;
                    374:   uschar Xocchars[8];
                    375: #endif
                    376: 
                    377:   int Xcodelink;
                    378:   int Xctype;
                    379:   unsigned int Xfc;
                    380:   int Xfi;
                    381:   int Xlength;
                    382:   int Xmax;
                    383:   int Xmin;
                    384:   int Xnumber;
                    385:   int Xoffset;
                    386:   int Xop;
                    387:   int Xsave_capture_last;
                    388:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    389:   int Xstacksave[REC_STACK_SAVE_MAX];
                    390: 
                    391:   eptrblock Xnewptrb;
                    392: 
                    393:   /* Where to jump back to */
                    394: 
                    395:   int Xwhere;
                    396: 
                    397: } heapframe;
                    398: 
                    399: #endif
                    400: 
                    401: 
                    402: /***************************************************************************
                    403: ***************************************************************************/
                    404: 
                    405: 
                    406: 
                    407: /*************************************************
                    408: *         Match from current position            *
                    409: *************************************************/
                    410: 
                    411: /* This function is called recursively in many circumstances. Whenever it
                    412: returns a negative (error) response, the outer incarnation must also return the
                    413: same response. */
                    414: 
                    415: /* These macros pack up tests that are used for partial matching, and which
                    416: appears several times in the code. We set the "hit end" flag if the pointer is
                    417: at the end of the subject and also past the start of the subject (i.e.
                    418: something has been matched). For hard partial matching, we then return
                    419: immediately. The second one is used when we already know we are past the end of
                    420: the subject. */
                    421: 
                    422: #define CHECK_PARTIAL()\
                    423:   if (md->partial != 0 && eptr >= md->end_subject && \
                    424:       eptr > md->start_used_ptr) \
                    425:     { \
                    426:     md->hitend = TRUE; \
                    427:     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
                    428:     }
                    429: 
                    430: #define SCHECK_PARTIAL()\
                    431:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    432:     { \
                    433:     md->hitend = TRUE; \
                    434:     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
                    435:     }
                    436: 
                    437: 
                    438: /* Performance note: It might be tempting to extract commonly used fields from
                    439: the md structure (e.g. utf8, end_subject) into individual variables to improve
                    440: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    441: made performance worse.
                    442: 
                    443: Arguments:
                    444:    eptr        pointer to current character in subject
                    445:    ecode       pointer to current position in compiled code
                    446:    mstart      pointer to the current match start position (can be modified
                    447:                  by encountering \K)
                    448:    markptr     pointer to the most recent MARK name, or NULL
                    449:    offset_top  current top pointer
                    450:    md          pointer to "static" info for the match
                    451:    ims         current /i, /m, and /s options
                    452:    eptrb       pointer to chain of blocks containing eptr at start of
                    453:                  brackets - for testing for empty matches
                    454:    flags       can contain
                    455:                  match_condassert - this is an assertion condition
                    456:                  match_cbegroup - this is the start of an unlimited repeat
                    457:                    group that can match an empty string
                    458:    rdepth      the recursion depth
                    459: 
                    460: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    461:                MATCH_NOMATCH if failed to match  )
                    462:                a negative MATCH_xxx value for PRUNE, SKIP, etc
                    463:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    464:                  (e.g. stopped by repeated call or recursion limit)
                    465: */
                    466: 
                    467: static int
                    468: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
                    469:   const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
                    470:   eptrblock *eptrb, int flags, unsigned int rdepth)
                    471: {
                    472: /* These variables do not need to be preserved over recursion in this function,
                    473: so they can be ordinary variables in all cases. Mark some of them with
                    474: "register" because they are used a lot in loops. */
                    475: 
                    476: register int  rrc;         /* Returns from recursive calls */
                    477: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    478: register unsigned int c;   /* Character values not kept over RMATCH() calls */
                    479: register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
                    480: 
                    481: BOOL minimize, possessive; /* Quantifier options */
                    482: int condcode;
                    483: 
                    484: /* When recursion is not being used, all "local" variables that have to be
                    485: preserved over calls to RMATCH() are part of a "frame" which is obtained from
                    486: heap storage. Set up the top-level frame here; others are obtained from the
                    487: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
                    488: 
                    489: #ifdef NO_RECURSE
                    490: heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
                    491: if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                    492: frame->Xprevframe = NULL;            /* Marks the top level */
                    493: 
                    494: /* Copy in the original argument variables */
                    495: 
                    496: frame->Xeptr = eptr;
                    497: frame->Xecode = ecode;
                    498: frame->Xmstart = mstart;
                    499: frame->Xmarkptr = markptr;
                    500: frame->Xoffset_top = offset_top;
                    501: frame->Xims = ims;
                    502: frame->Xeptrb = eptrb;
                    503: frame->Xflags = flags;
                    504: frame->Xrdepth = rdepth;
                    505: 
                    506: /* This is where control jumps back to to effect "recursion" */
                    507: 
                    508: HEAP_RECURSE:
                    509: 
                    510: /* Macros make the argument variables come from the current frame */
                    511: 
                    512: #define eptr               frame->Xeptr
                    513: #define ecode              frame->Xecode
                    514: #define mstart             frame->Xmstart
                    515: #define markptr            frame->Xmarkptr
                    516: #define offset_top         frame->Xoffset_top
                    517: #define ims                frame->Xims
                    518: #define eptrb              frame->Xeptrb
                    519: #define flags              frame->Xflags
                    520: #define rdepth             frame->Xrdepth
                    521: 
                    522: /* Ditto for the local variables */
                    523: 
                    524: #ifdef SUPPORT_UTF8
                    525: #define charptr            frame->Xcharptr
                    526: #endif
                    527: #define callpat            frame->Xcallpat
                    528: #define codelink           frame->Xcodelink
                    529: #define data               frame->Xdata
                    530: #define next               frame->Xnext
                    531: #define pp                 frame->Xpp
                    532: #define prev               frame->Xprev
                    533: #define saved_eptr         frame->Xsaved_eptr
                    534: 
                    535: #define new_recursive      frame->Xnew_recursive
                    536: 
                    537: #define cur_is_word        frame->Xcur_is_word
                    538: #define condition          frame->Xcondition
                    539: #define prev_is_word       frame->Xprev_is_word
                    540: 
                    541: #define original_ims       frame->Xoriginal_ims
                    542: 
                    543: #ifdef SUPPORT_UCP
                    544: #define prop_type          frame->Xprop_type
                    545: #define prop_value         frame->Xprop_value
                    546: #define prop_fail_result   frame->Xprop_fail_result
                    547: #define prop_category      frame->Xprop_category
                    548: #define prop_chartype      frame->Xprop_chartype
                    549: #define prop_script        frame->Xprop_script
                    550: #define oclength           frame->Xoclength
                    551: #define occhars            frame->Xocchars
                    552: #endif
                    553: 
                    554: #define ctype              frame->Xctype
                    555: #define fc                 frame->Xfc
                    556: #define fi                 frame->Xfi
                    557: #define length             frame->Xlength
                    558: #define max                frame->Xmax
                    559: #define min                frame->Xmin
                    560: #define number             frame->Xnumber
                    561: #define offset             frame->Xoffset
                    562: #define op                 frame->Xop
                    563: #define save_capture_last  frame->Xsave_capture_last
                    564: #define save_offset1       frame->Xsave_offset1
                    565: #define save_offset2       frame->Xsave_offset2
                    566: #define save_offset3       frame->Xsave_offset3
                    567: #define stacksave          frame->Xstacksave
                    568: 
                    569: #define newptrb            frame->Xnewptrb
                    570: 
                    571: /* When recursion is being used, local variables are allocated on the stack and
                    572: get preserved during recursion in the normal way. In this environment, fi and
                    573: i, and fc and c, can be the same variables. */
                    574: 
                    575: #else         /* NO_RECURSE not defined */
                    576: #define fi i
                    577: #define fc c
                    578: 
                    579: 
                    580: #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
                    581: const uschar *charptr;             /* in small blocks of the code. My normal */
                    582: #endif                             /* style of coding would have declared    */
                    583: const uschar *callpat;             /* them within each of those blocks.      */
                    584: const uschar *data;                /* However, in order to accommodate the   */
                    585: const uschar *next;                /* version of this code that uses an      */
                    586: USPTR         pp;                  /* external "stack" implemented on the    */
                    587: const uschar *prev;                /* heap, it is easier to declare them all */
                    588: USPTR         saved_eptr;          /* here, so the declarations can be cut   */
                    589:                                    /* out in a block. The only declarations  */
                    590: recursion_info new_recursive;      /* within blocks below are for variables  */
                    591:                                    /* that do not have to be preserved over  */
                    592: BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
                    593: BOOL condition;
                    594: BOOL prev_is_word;
                    595: 
                    596: unsigned long int original_ims;
                    597: 
                    598: #ifdef SUPPORT_UCP
                    599: int prop_type;
                    600: int prop_value;
                    601: int prop_fail_result;
                    602: int prop_category;
                    603: int prop_chartype;
                    604: int prop_script;
                    605: int oclength;
                    606: uschar occhars[8];
                    607: #endif
                    608: 
                    609: int codelink;
                    610: int ctype;
                    611: int length;
                    612: int max;
                    613: int min;
                    614: int number;
                    615: int offset;
                    616: int op;
                    617: int save_capture_last;
                    618: int save_offset1, save_offset2, save_offset3;
                    619: int stacksave[REC_STACK_SAVE_MAX];
                    620: 
                    621: eptrblock newptrb;
                    622: #endif     /* NO_RECURSE */
                    623: 
                    624: /* These statements are here to stop the compiler complaining about unitialized
                    625: variables. */
                    626: 
                    627: #ifdef SUPPORT_UCP
                    628: prop_value = 0;
                    629: prop_fail_result = 0;
                    630: #endif
                    631: 
                    632: 
                    633: /* This label is used for tail recursion, which is used in a few cases even
                    634: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    635: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    636: original patch. */
                    637: 
                    638: TAIL_RECURSE:
                    639: 
                    640: /* OK, now we can get on with the real code of the function. Recursive calls
                    641: are specified by the macro RMATCH and RRETURN is used to return. When
                    642: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    643: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
                    644: defined). However, RMATCH isn't like a function call because it's quite a
                    645: complicated macro. It has to be used in one particular way. This shouldn't,
                    646: however, impact performance when true recursion is being used. */
                    647: 
                    648: #ifdef SUPPORT_UTF8
                    649: utf8 = md->utf8;       /* Local copy of the flag */
                    650: #else
                    651: utf8 = FALSE;
                    652: #endif
                    653: 
                    654: /* First check that we haven't called match() too many times, or that we
                    655: haven't exceeded the recursive call limit. */
                    656: 
                    657: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    658: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    659: 
                    660: original_ims = ims;    /* Save for resetting on ')' */
                    661: 
                    662: /* At the start of a group with an unlimited repeat that may match an empty
                    663: string, the match_cbegroup flag is set. When this is the case, add the current
                    664: subject pointer to the chain of such remembered pointers, to be checked when we
                    665: hit the closing ket, in order to break infinite loops that match no characters.
                    666: When match() is called in other circumstances, don't add to the chain. The
                    667: match_cbegroup flag must NOT be used with tail recursion, because the memory
                    668: block that is used is on the stack, so a new one may be required for each
                    669: match(). */
                    670: 
                    671: if ((flags & match_cbegroup) != 0)
                    672:   {
                    673:   newptrb.epb_saved_eptr = eptr;
                    674:   newptrb.epb_prev = eptrb;
                    675:   eptrb = &newptrb;
                    676:   }
                    677: 
                    678: /* Now start processing the opcodes. */
                    679: 
                    680: for (;;)
                    681:   {
                    682:   minimize = possessive = FALSE;
                    683:   op = *ecode;
                    684: 
                    685:   switch(op)
                    686:     {
                    687:     case OP_MARK:
                    688:     markptr = ecode + 2;
                    689:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    690:       ims, eptrb, flags, RM55);
                    691: 
                    692:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    693:     argument, and we must check whether that argument matches this MARK's
                    694:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    695:     variable). If it does match, we reset that variable to the current subject
                    696:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    697:     unaltered. */
                    698: 
                    699:     if (rrc == MATCH_SKIP_ARG &&
                    700:         strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
                    701:       {
                    702:       md->start_match_ptr = eptr;
                    703:       RRETURN(MATCH_SKIP);
                    704:       }
                    705: 
                    706:     if (md->mark == NULL) md->mark = markptr;
                    707:     RRETURN(rrc);
                    708: 
                    709:     case OP_FAIL:
                    710:     MRRETURN(MATCH_NOMATCH);
                    711: 
                    712:     /* COMMIT overrides PRUNE, SKIP, and THEN */
                    713: 
                    714:     case OP_COMMIT:
                    715:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    716:       ims, eptrb, flags, RM52);
                    717:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
                    718:         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
                    719:         rrc != MATCH_THEN)
                    720:       RRETURN(rrc);
                    721:     MRRETURN(MATCH_COMMIT);
                    722: 
                    723:     /* PRUNE overrides THEN */
                    724: 
                    725:     case OP_PRUNE:
                    726:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    727:       ims, eptrb, flags, RM51);
                    728:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    729:     MRRETURN(MATCH_PRUNE);
                    730: 
                    731:     case OP_PRUNE_ARG:
                    732:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    733:       ims, eptrb, flags, RM56);
                    734:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    735:     md->mark = ecode + 2;
                    736:     RRETURN(MATCH_PRUNE);
                    737: 
                    738:     /* SKIP overrides PRUNE and THEN */
                    739: 
                    740:     case OP_SKIP:
                    741:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    742:       ims, eptrb, flags, RM53);
                    743:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    744:       RRETURN(rrc);
                    745:     md->start_match_ptr = eptr;   /* Pass back current position */
                    746:     MRRETURN(MATCH_SKIP);
                    747: 
                    748:     case OP_SKIP_ARG:
                    749:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    750:       ims, eptrb, flags, RM57);
                    751:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    752:       RRETURN(rrc);
                    753: 
                    754:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    755:     returning the special MATCH_SKIP_ARG return code. This will either be
                    756:     caught by a matching MARK, or get to the top, where it is treated the same
                    757:     as PRUNE. */
                    758: 
                    759:     md->start_match_ptr = ecode + 2;
                    760:     RRETURN(MATCH_SKIP_ARG);
                    761: 
                    762:     /* For THEN (and THEN_ARG) we pass back the address of the bracket or
                    763:     the alt that is at the start of the current branch. This makes it possible
                    764:     to skip back past alternatives that precede the THEN within the current
                    765:     branch. */
                    766: 
                    767:     case OP_THEN:
                    768:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    769:       ims, eptrb, flags, RM54);
                    770:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    771:     md->start_match_ptr = ecode - GET(ecode, 1);
                    772:     MRRETURN(MATCH_THEN);
                    773: 
                    774:     case OP_THEN_ARG:
                    775:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
                    776:       offset_top, md, ims, eptrb, flags, RM58);
                    777:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    778:     md->start_match_ptr = ecode - GET(ecode, 1);
                    779:     md->mark = ecode + LINK_SIZE + 2;
                    780:     RRETURN(MATCH_THEN);
                    781: 
                    782:     /* Handle a capturing bracket. If there is space in the offset vector, save
                    783:     the current subject position in the working slot at the top of the vector.
                    784:     We mustn't change the current values of the data slot, because they may be
                    785:     set from a previous iteration of this group, and be referred to by a
                    786:     reference inside the group.
                    787: 
                    788:     If the bracket fails to match, we need to restore this value and also the
                    789:     values of the final offsets, in case they were set by a previous iteration
                    790:     of the same bracket.
                    791: 
                    792:     If there isn't enough space in the offset vector, treat this as if it were
                    793:     a non-capturing bracket. Don't worry about setting the flag for the error
                    794:     case here; that is handled in the code for KET. */
                    795: 
                    796:     case OP_CBRA:
                    797:     case OP_SCBRA:
                    798:     number = GET2(ecode, 1+LINK_SIZE);
                    799:     offset = number << 1;
                    800: 
                    801: #ifdef PCRE_DEBUG
                    802:     printf("start bracket %d\n", number);
                    803:     printf("subject=");
                    804:     pchars(eptr, 16, TRUE, md);
                    805:     printf("\n");
                    806: #endif
                    807: 
                    808:     if (offset < md->offset_max)
                    809:       {
                    810:       save_offset1 = md->offset_vector[offset];
                    811:       save_offset2 = md->offset_vector[offset+1];
                    812:       save_offset3 = md->offset_vector[md->offset_end - number];
                    813:       save_capture_last = md->capture_last;
                    814: 
                    815:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    816:       md->offset_vector[md->offset_end - number] =
                    817:         (int)(eptr - md->start_subject);
                    818: 
                    819:       flags = (op == OP_SCBRA)? match_cbegroup : 0;
                    820:       do
                    821:         {
                    822:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    823:           ims, eptrb, flags, RM1);
                    824:         if (rrc != MATCH_NOMATCH &&
                    825:             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                    826:           RRETURN(rrc);
                    827:         md->capture_last = save_capture_last;
                    828:         ecode += GET(ecode, 1);
                    829:         }
                    830:       while (*ecode == OP_ALT);
                    831: 
                    832:       DPRINTF(("bracket %d failed\n", number));
                    833: 
                    834:       md->offset_vector[offset] = save_offset1;
                    835:       md->offset_vector[offset+1] = save_offset2;
                    836:       md->offset_vector[md->offset_end - number] = save_offset3;
                    837: 
                    838:       if (rrc != MATCH_THEN) md->mark = markptr;
                    839:       RRETURN(MATCH_NOMATCH);
                    840:       }
                    841: 
                    842:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    843:     as a non-capturing bracket. */
                    844: 
                    845:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    846:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    847: 
                    848:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    849: 
                    850:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    851:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    852: 
                    853:     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
                    854:     final alternative within the brackets, we would return the result of a
                    855:     recursive call to match() whatever happened. We can reduce stack usage by
                    856:     turning this into a tail recursion, except in the case when match_cbegroup
                    857:     is set.*/
                    858: 
                    859:     case OP_BRA:
                    860:     case OP_SBRA:
                    861:     DPRINTF(("start non-capturing bracket\n"));
                    862:     flags = (op >= OP_SBRA)? match_cbegroup : 0;
                    863:     for (;;)
                    864:       {
                    865:       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
                    866:         {
                    867:         if (flags == 0)    /* Not a possibly empty group */
                    868:           {
                    869:           ecode += _pcre_OP_lengths[*ecode];
                    870:           DPRINTF(("bracket 0 tail recursion\n"));
                    871:           goto TAIL_RECURSE;
                    872:           }
                    873: 
                    874:         /* Possibly empty group; can't use tail recursion. */
                    875: 
                    876:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    877:           eptrb, flags, RM48);
                    878:         if (rrc == MATCH_NOMATCH) md->mark = markptr;
                    879:         RRETURN(rrc);
                    880:         }
                    881: 
                    882:       /* For non-final alternatives, continue the loop for a NOMATCH result;
                    883:       otherwise return. */
                    884: 
                    885:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    886:         eptrb, flags, RM2);
                    887:       if (rrc != MATCH_NOMATCH &&
                    888:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                    889:         RRETURN(rrc);
                    890:       ecode += GET(ecode, 1);
                    891:       }
                    892:     /* Control never reaches here. */
                    893: 
                    894:     /* Conditional group: compilation checked that there are no more than
                    895:     two branches. If the condition is false, skipping the first branch takes us
                    896:     past the end if there is only one branch, but that's OK because that is
                    897:     exactly what going to the ket would do. As there is only one branch to be
                    898:     obeyed, we can use tail recursion to avoid using another stack frame. */
                    899: 
                    900:     case OP_COND:
                    901:     case OP_SCOND:
                    902:     codelink= GET(ecode, 1);
                    903: 
                    904:     /* Because of the way auto-callout works during compile, a callout item is
                    905:     inserted between OP_COND and an assertion condition. */
                    906: 
                    907:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                    908:       {
                    909:       if (pcre_callout != NULL)
                    910:         {
                    911:         pcre_callout_block cb;
                    912:         cb.version          = 1;   /* Version 1 of the callout block */
                    913:         cb.callout_number   = ecode[LINK_SIZE+2];
                    914:         cb.offset_vector    = md->offset_vector;
                    915:         cb.subject          = (PCRE_SPTR)md->start_subject;
                    916:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                    917:         cb.start_match      = (int)(mstart - md->start_subject);
                    918:         cb.current_position = (int)(eptr - md->start_subject);
                    919:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                    920:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                    921:         cb.capture_top      = offset_top/2;
                    922:         cb.capture_last     = md->capture_last;
                    923:         cb.callout_data     = md->callout_data;
                    924:         if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
                    925:         if (rrc < 0) RRETURN(rrc);
                    926:         }
                    927:       ecode += _pcre_OP_lengths[OP_CALLOUT];
                    928:       }
                    929: 
                    930:     condcode = ecode[LINK_SIZE+1];
                    931: 
                    932:     /* Now see what the actual condition is */
                    933: 
                    934:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
                    935:       {
                    936:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                    937:         {
                    938:         condition = FALSE;
                    939:         ecode += GET(ecode, 1);
                    940:         }
                    941:       else
                    942:         {
                    943:         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
                    944:         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
                    945: 
                    946:         /* If the test is for recursion into a specific subpattern, and it is
                    947:         false, but the test was set up by name, scan the table to see if the
                    948:         name refers to any other numbers, and test them. The condition is true
                    949:         if any one is set. */
                    950: 
                    951:         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
                    952:           {
                    953:           uschar *slotA = md->name_table;
                    954:           for (i = 0; i < md->name_count; i++)
                    955:             {
                    956:             if (GET2(slotA, 0) == recno) break;
                    957:             slotA += md->name_entry_size;
                    958:             }
                    959: 
                    960:           /* Found a name for the number - there can be only one; duplicate
                    961:           names for different numbers are allowed, but not vice versa. First
                    962:           scan down for duplicates. */
                    963: 
                    964:           if (i < md->name_count)
                    965:             {
                    966:             uschar *slotB = slotA;
                    967:             while (slotB > md->name_table)
                    968:               {
                    969:               slotB -= md->name_entry_size;
                    970:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                    971:                 {
                    972:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                    973:                 if (condition) break;
                    974:                 }
                    975:               else break;
                    976:               }
                    977: 
                    978:             /* Scan up for duplicates */
                    979: 
                    980:             if (!condition)
                    981:               {
                    982:               slotB = slotA;
                    983:               for (i++; i < md->name_count; i++)
                    984:                 {
                    985:                 slotB += md->name_entry_size;
                    986:                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                    987:                   {
                    988:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                    989:                   if (condition) break;
                    990:                   }
                    991:                 else break;
                    992:                 }
                    993:               }
                    994:             }
                    995:           }
                    996: 
                    997:         /* Chose branch according to the condition */
                    998: 
                    999:         ecode += condition? 3 : GET(ecode, 1);
                   1000:         }
                   1001:       }
                   1002: 
                   1003:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
                   1004:       {
                   1005:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1006:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                   1007: 
                   1008:       /* If the numbered capture is unset, but the reference was by name,
                   1009:       scan the table to see if the name refers to any other numbers, and test
                   1010:       them. The condition is true if any one is set. This is tediously similar
                   1011:       to the code above, but not close enough to try to amalgamate. */
                   1012: 
                   1013:       if (!condition && condcode == OP_NCREF)
                   1014:         {
                   1015:         int refno = offset >> 1;
                   1016:         uschar *slotA = md->name_table;
                   1017: 
                   1018:         for (i = 0; i < md->name_count; i++)
                   1019:           {
                   1020:           if (GET2(slotA, 0) == refno) break;
                   1021:           slotA += md->name_entry_size;
                   1022:           }
                   1023: 
                   1024:         /* Found a name for the number - there can be only one; duplicate names
                   1025:         for different numbers are allowed, but not vice versa. First scan down
                   1026:         for duplicates. */
                   1027: 
                   1028:         if (i < md->name_count)
                   1029:           {
                   1030:           uschar *slotB = slotA;
                   1031:           while (slotB > md->name_table)
                   1032:             {
                   1033:             slotB -= md->name_entry_size;
                   1034:             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1035:               {
                   1036:               offset = GET2(slotB, 0) << 1;
                   1037:               condition = offset < offset_top &&
                   1038:                 md->offset_vector[offset] >= 0;
                   1039:               if (condition) break;
                   1040:               }
                   1041:             else break;
                   1042:             }
                   1043: 
                   1044:           /* Scan up for duplicates */
                   1045: 
                   1046:           if (!condition)
                   1047:             {
                   1048:             slotB = slotA;
                   1049:             for (i++; i < md->name_count; i++)
                   1050:               {
                   1051:               slotB += md->name_entry_size;
                   1052:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1053:                 {
                   1054:                 offset = GET2(slotB, 0) << 1;
                   1055:                 condition = offset < offset_top &&
                   1056:                   md->offset_vector[offset] >= 0;
                   1057:                 if (condition) break;
                   1058:                 }
                   1059:               else break;
                   1060:               }
                   1061:             }
                   1062:           }
                   1063:         }
                   1064: 
                   1065:       /* Chose branch according to the condition */
                   1066: 
                   1067:       ecode += condition? 3 : GET(ecode, 1);
                   1068:       }
                   1069: 
                   1070:     else if (condcode == OP_DEF)     /* DEFINE - always false */
                   1071:       {
                   1072:       condition = FALSE;
                   1073:       ecode += GET(ecode, 1);
                   1074:       }
                   1075: 
                   1076:     /* The condition is an assertion. Call match() to evaluate it - setting
                   1077:     the final argument match_condassert causes it to stop at the end of an
                   1078:     assertion. */
                   1079: 
                   1080:     else
                   1081:       {
                   1082:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
                   1083:           match_condassert, RM3);
                   1084:       if (rrc == MATCH_MATCH)
                   1085:         {
                   1086:         condition = TRUE;
                   1087:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1088:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1089:         }
                   1090:       else if (rrc != MATCH_NOMATCH &&
                   1091:               (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                   1092:         {
                   1093:         RRETURN(rrc);         /* Need braces because of following else */
                   1094:         }
                   1095:       else
                   1096:         {
                   1097:         condition = FALSE;
                   1098:         ecode += codelink;
                   1099:         }
                   1100:       }
                   1101: 
                   1102:     /* We are now at the branch that is to be obeyed. As there is only one,
                   1103:     we can use tail recursion to avoid using another stack frame, except when
                   1104:     match_cbegroup is required for an unlimited repeat of a possibly empty
                   1105:     group. If the second alternative doesn't exist, we can just plough on. */
                   1106: 
                   1107:     if (condition || *ecode == OP_ALT)
                   1108:       {
                   1109:       ecode += 1 + LINK_SIZE;
                   1110:       if (op == OP_SCOND)        /* Possibly empty group */
                   1111:         {
                   1112:         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
                   1113:         RRETURN(rrc);
                   1114:         }
                   1115:       else                       /* Group must match something */
                   1116:         {
                   1117:         flags = 0;
                   1118:         goto TAIL_RECURSE;
                   1119:         }
                   1120:       }
                   1121:     else                         /* Condition false & no alternative */
                   1122:       {
                   1123:       ecode += 1 + LINK_SIZE;
                   1124:       }
                   1125:     break;
                   1126: 
                   1127: 
                   1128:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1129:     to close any currently open capturing brackets. */
                   1130: 
                   1131:     case OP_CLOSE:
                   1132:     number = GET2(ecode, 1);
                   1133:     offset = number << 1;
                   1134: 
                   1135: #ifdef PCRE_DEBUG
                   1136:       printf("end bracket %d at *ACCEPT", number);
                   1137:       printf("\n");
                   1138: #endif
                   1139: 
                   1140:     md->capture_last = number;
                   1141:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1142:       {
                   1143:       md->offset_vector[offset] =
                   1144:         md->offset_vector[md->offset_end - number];
                   1145:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1146:       if (offset_top <= offset) offset_top = offset + 2;
                   1147:       }
                   1148:     ecode += 3;
                   1149:     break;
                   1150: 
                   1151: 
                   1152:     /* End of the pattern, either real or forced. If we are in a top-level
                   1153:     recursion, we should restore the offsets appropriately and continue from
                   1154:     after the call. */
                   1155: 
                   1156:     case OP_ACCEPT:
                   1157:     case OP_END:
                   1158:     if (md->recursive != NULL && md->recursive->group_num == 0)
                   1159:       {
                   1160:       recursion_info *rec = md->recursive;
                   1161:       DPRINTF(("End of pattern in a (?0) recursion\n"));
                   1162:       md->recursive = rec->prevrec;
                   1163:       memmove(md->offset_vector, rec->offset_save,
                   1164:         rec->saved_max * sizeof(int));
                   1165:       offset_top = rec->save_offset_top;
                   1166:       ims = original_ims;
                   1167:       ecode = rec->after_call;
                   1168:       break;
                   1169:       }
                   1170: 
                   1171:     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
                   1172:     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
                   1173:     the subject. In both cases, backtracking will then try other alternatives,
                   1174:     if any. */
                   1175: 
                   1176:     if (eptr == mstart &&
                   1177:         (md->notempty ||
                   1178:           (md->notempty_atstart &&
                   1179:             mstart == md->start_subject + md->start_offset)))
                   1180:       MRRETURN(MATCH_NOMATCH);
                   1181: 
                   1182:     /* Otherwise, we have a match. */
                   1183: 
                   1184:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1185:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1186:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                   1187: 
                   1188:     /* For some reason, the macros don't work properly if an expression is
                   1189:     given as the argument to MRRETURN when the heap is in use. */
                   1190: 
                   1191:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
                   1192:     MRRETURN(rrc);
                   1193: 
                   1194:     /* Change option settings */
                   1195: 
                   1196:     case OP_OPT:
                   1197:     ims = ecode[1];
                   1198:     ecode += 2;
                   1199:     DPRINTF(("ims set to %02lx\n", ims));
                   1200:     break;
                   1201: 
                   1202:     /* Assertion brackets. Check the alternative branches in turn - the
                   1203:     matching won't pass the KET for an assertion. If any one branch matches,
                   1204:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1205:     start of each branch to move the current point backwards, so the code at
                   1206:     this level is identical to the lookahead case. */
                   1207: 
                   1208:     case OP_ASSERT:
                   1209:     case OP_ASSERTBACK:
                   1210:     do
                   1211:       {
                   1212:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                   1213:         RM4);
                   1214:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1215:         {
                   1216:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1217:         break;
                   1218:         }
                   1219:       if (rrc != MATCH_NOMATCH &&
                   1220:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                   1221:         RRETURN(rrc);
                   1222:       ecode += GET(ecode, 1);
                   1223:       }
                   1224:     while (*ecode == OP_ALT);
                   1225:     if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
                   1226: 
                   1227:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1228: 
                   1229:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                   1230: 
                   1231:     /* Continue from after the assertion, updating the offsets high water
                   1232:     mark, since extracts may have been taken during the assertion. */
                   1233: 
                   1234:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1235:     ecode += 1 + LINK_SIZE;
                   1236:     offset_top = md->end_offset_top;
                   1237:     continue;
                   1238: 
                   1239:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
                   1240:     PRUNE, or COMMIT means we must assume failure without checking subsequent
                   1241:     branches. */
                   1242: 
                   1243:     case OP_ASSERT_NOT:
                   1244:     case OP_ASSERTBACK_NOT:
                   1245:     do
                   1246:       {
                   1247:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                   1248:         RM5);
                   1249:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
                   1250:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
                   1251:         {
                   1252:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1253:         break;
                   1254:         }
                   1255:       if (rrc != MATCH_NOMATCH &&
                   1256:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                   1257:         RRETURN(rrc);
                   1258:       ecode += GET(ecode,1);
                   1259:       }
                   1260:     while (*ecode == OP_ALT);
                   1261: 
                   1262:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                   1263: 
                   1264:     ecode += 1 + LINK_SIZE;
                   1265:     continue;
                   1266: 
                   1267:     /* Move the subject pointer back. This occurs only at the start of
                   1268:     each branch of a lookbehind assertion. If we are too close to the start to
                   1269:     move back, this match function fails. When working with UTF-8 we move
                   1270:     back a number of characters, not bytes. */
                   1271: 
                   1272:     case OP_REVERSE:
                   1273: #ifdef SUPPORT_UTF8
                   1274:     if (utf8)
                   1275:       {
                   1276:       i = GET(ecode, 1);
                   1277:       while (i-- > 0)
                   1278:         {
                   1279:         eptr--;
                   1280:         if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
                   1281:         BACKCHAR(eptr);
                   1282:         }
                   1283:       }
                   1284:     else
                   1285: #endif
                   1286: 
                   1287:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1288: 
                   1289:       {
                   1290:       eptr -= GET(ecode, 1);
                   1291:       if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
                   1292:       }
                   1293: 
                   1294:     /* Save the earliest consulted character, then skip to next op code */
                   1295: 
                   1296:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
                   1297:     ecode += 1 + LINK_SIZE;
                   1298:     break;
                   1299: 
                   1300:     /* The callout item calls an external function, if one is provided, passing
                   1301:     details of the match so far. This is mainly for debugging, though the
                   1302:     function is able to force a failure. */
                   1303: 
                   1304:     case OP_CALLOUT:
                   1305:     if (pcre_callout != NULL)
                   1306:       {
                   1307:       pcre_callout_block cb;
                   1308:       cb.version          = 1;   /* Version 1 of the callout block */
                   1309:       cb.callout_number   = ecode[1];
                   1310:       cb.offset_vector    = md->offset_vector;
                   1311:       cb.subject          = (PCRE_SPTR)md->start_subject;
                   1312:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1313:       cb.start_match      = (int)(mstart - md->start_subject);
                   1314:       cb.current_position = (int)(eptr - md->start_subject);
                   1315:       cb.pattern_position = GET(ecode, 2);
                   1316:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1317:       cb.capture_top      = offset_top/2;
                   1318:       cb.capture_last     = md->capture_last;
                   1319:       cb.callout_data     = md->callout_data;
                   1320:       if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
                   1321:       if (rrc < 0) RRETURN(rrc);
                   1322:       }
                   1323:     ecode += 2 + 2*LINK_SIZE;
                   1324:     break;
                   1325: 
                   1326:     /* Recursion either matches the current regex, or some subexpression. The
                   1327:     offset data is the offset to the starting bracket from the start of the
                   1328:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1329: 
                   1330:     If there are any capturing brackets started but not finished, we have to
                   1331:     save their starting points and reinstate them after the recursion. However,
                   1332:     we don't know how many such there are (offset_top records the completed
                   1333:     total) so we just have to save all the potential data. There may be up to
                   1334:     65535 such values, which is too large to put on the stack, but using malloc
                   1335:     for small numbers seems expensive. As a compromise, the stack is used when
                   1336:     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
                   1337:     is used. A problem is what to do if the malloc fails ... there is no way of
                   1338:     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
                   1339:     values on the stack, and accept that the rest may be wrong.
                   1340: 
                   1341:     There are also other values that have to be saved. We use a chained
                   1342:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1343:     for the original version of this logic. */
                   1344: 
                   1345:     case OP_RECURSE:
                   1346:       {
                   1347:       callpat = md->start_code + GET(ecode, 1);
                   1348:       new_recursive.group_num = (callpat == md->start_code)? 0 :
                   1349:         GET2(callpat, 1 + LINK_SIZE);
                   1350: 
                   1351:       /* Add to "recursing stack" */
                   1352: 
                   1353:       new_recursive.prevrec = md->recursive;
                   1354:       md->recursive = &new_recursive;
                   1355: 
                   1356:       /* Find where to continue from afterwards */
                   1357: 
                   1358:       ecode += 1 + LINK_SIZE;
                   1359:       new_recursive.after_call = ecode;
                   1360: 
                   1361:       /* Now save the offset data. */
                   1362: 
                   1363:       new_recursive.saved_max = md->offset_end;
                   1364:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1365:         new_recursive.offset_save = stacksave;
                   1366:       else
                   1367:         {
                   1368:         new_recursive.offset_save =
                   1369:           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
                   1370:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1371:         }
                   1372: 
                   1373:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1374:             new_recursive.saved_max * sizeof(int));
                   1375:       new_recursive.save_offset_top = offset_top;
                   1376: 
                   1377:       /* OK, now we can do the recursion. For each top-level alternative we
                   1378:       restore the offset and recursion data. */
                   1379: 
                   1380:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1381:       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
                   1382:       do
                   1383:         {
                   1384:         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
                   1385:           md, ims, eptrb, flags, RM6);
                   1386:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1387:           {
                   1388:           DPRINTF(("Recursion matched\n"));
                   1389:           md->recursive = new_recursive.prevrec;
                   1390:           if (new_recursive.offset_save != stacksave)
                   1391:             (pcre_free)(new_recursive.offset_save);
                   1392:           MRRETURN(MATCH_MATCH);
                   1393:           }
                   1394:         else if (rrc != MATCH_NOMATCH &&
                   1395:                 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                   1396:           {
                   1397:           DPRINTF(("Recursion gave error %d\n", rrc));
                   1398:           if (new_recursive.offset_save != stacksave)
                   1399:             (pcre_free)(new_recursive.offset_save);
                   1400:           RRETURN(rrc);
                   1401:           }
                   1402: 
                   1403:         md->recursive = &new_recursive;
                   1404:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1405:             new_recursive.saved_max * sizeof(int));
                   1406:         callpat += GET(callpat, 1);
                   1407:         }
                   1408:       while (*callpat == OP_ALT);
                   1409: 
                   1410:       DPRINTF(("Recursion didn't match\n"));
                   1411:       md->recursive = new_recursive.prevrec;
                   1412:       if (new_recursive.offset_save != stacksave)
                   1413:         (pcre_free)(new_recursive.offset_save);
                   1414:       MRRETURN(MATCH_NOMATCH);
                   1415:       }
                   1416:     /* Control never reaches here */
                   1417: 
                   1418:     /* "Once" brackets are like assertion brackets except that after a match,
                   1419:     the point in the subject string is not moved back. Thus there can never be
                   1420:     a move back into the brackets. Friedl calls these "atomic" subpatterns.
                   1421:     Check the alternative branches in turn - the matching won't pass the KET
                   1422:     for this kind of subpattern. If any one branch matches, we carry on as at
                   1423:     the end of a normal bracket, leaving the subject pointer, but resetting
                   1424:     the start-of-match value in case it was changed by \K. */
                   1425: 
                   1426:     case OP_ONCE:
                   1427:     prev = ecode;
                   1428:     saved_eptr = eptr;
                   1429: 
                   1430:     do
                   1431:       {
                   1432:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
                   1433:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
                   1434:         {
                   1435:         mstart = md->start_match_ptr;
                   1436:         break;
                   1437:         }
                   1438:       if (rrc != MATCH_NOMATCH &&
                   1439:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
                   1440:         RRETURN(rrc);
                   1441:       ecode += GET(ecode,1);
                   1442:       }
                   1443:     while (*ecode == OP_ALT);
                   1444: 
                   1445:     /* If hit the end of the group (which could be repeated), fail */
                   1446: 
                   1447:     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                   1448: 
                   1449:     /* Continue as from after the assertion, updating the offsets high water
                   1450:     mark, since extracts may have been taken. */
                   1451: 
                   1452:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1453: 
                   1454:     offset_top = md->end_offset_top;
                   1455:     eptr = md->end_match_ptr;
                   1456: 
                   1457:     /* For a non-repeating ket, just continue at this level. This also
                   1458:     happens for a repeating ket if no characters were matched in the group.
                   1459:     This is the forcible breaking of infinite loops as implemented in Perl
                   1460:     5.005. If there is an options reset, it will get obeyed in the normal
                   1461:     course of events. */
                   1462: 
                   1463:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1464:       {
                   1465:       ecode += 1+LINK_SIZE;
                   1466:       break;
                   1467:       }
                   1468: 
                   1469:     /* The repeating kets try the rest of the pattern or restart from the
                   1470:     preceding bracket, in the appropriate order. The second "call" of match()
                   1471:     uses tail recursion, to avoid using another stack frame. We need to reset
                   1472:     any options that changed within the bracket before re-running it, so
                   1473:     check the next opcode. */
                   1474: 
                   1475:     if (ecode[1+LINK_SIZE] == OP_OPT)
                   1476:       {
                   1477:       ims = (ims & ~PCRE_IMS) | ecode[4];
                   1478:       DPRINTF(("ims set to %02lx at group repeat\n", ims));
                   1479:       }
                   1480: 
                   1481:     if (*ecode == OP_KETRMIN)
                   1482:       {
                   1483:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
                   1484:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1485:       ecode = prev;
                   1486:       flags = 0;
                   1487:       goto TAIL_RECURSE;
                   1488:       }
                   1489:     else  /* OP_KETRMAX */
                   1490:       {
                   1491:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
                   1492:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1493:       ecode += 1 + LINK_SIZE;
                   1494:       flags = 0;
                   1495:       goto TAIL_RECURSE;
                   1496:       }
                   1497:     /* Control never gets here */
                   1498: 
                   1499:     /* An alternation is the end of a branch; scan along to find the end of the
                   1500:     bracketed group and go to there. */
                   1501: 
                   1502:     case OP_ALT:
                   1503:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1504:     break;
                   1505: 
                   1506:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1507:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1508:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1509:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1510:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1511: 
                   1512:     case OP_BRAZERO:
                   1513:       {
                   1514:       next = ecode+1;
                   1515:       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
                   1516:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1517:       do next += GET(next,1); while (*next == OP_ALT);
                   1518:       ecode = next + 1 + LINK_SIZE;
                   1519:       }
                   1520:     break;
                   1521: 
                   1522:     case OP_BRAMINZERO:
                   1523:       {
                   1524:       next = ecode+1;
                   1525:       do next += GET(next, 1); while (*next == OP_ALT);
                   1526:       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
                   1527:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1528:       ecode++;
                   1529:       }
                   1530:     break;
                   1531: 
                   1532:     case OP_SKIPZERO:
                   1533:       {
                   1534:       next = ecode+1;
                   1535:       do next += GET(next,1); while (*next == OP_ALT);
                   1536:       ecode = next + 1 + LINK_SIZE;
                   1537:       }
                   1538:     break;
                   1539: 
                   1540:     /* End of a group, repeated or non-repeating. */
                   1541: 
                   1542:     case OP_KET:
                   1543:     case OP_KETRMIN:
                   1544:     case OP_KETRMAX:
                   1545:     prev = ecode - GET(ecode, 1);
                   1546: 
                   1547:     /* If this was a group that remembered the subject start, in order to break
                   1548:     infinite repeats of empty string matches, retrieve the subject start from
                   1549:     the chain. Otherwise, set it NULL. */
                   1550: 
                   1551:     if (*prev >= OP_SBRA)
                   1552:       {
                   1553:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1554:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1555:       }
                   1556:     else saved_eptr = NULL;
                   1557: 
                   1558:     /* If we are at the end of an assertion group or an atomic group, stop
                   1559:     matching and return MATCH_MATCH, but record the current high water mark for
                   1560:     use by positive assertions. We also need to record the match start in case
                   1561:     it was changed by \K. */
                   1562: 
                   1563:     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
                   1564:         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
                   1565:         *prev == OP_ONCE)
                   1566:       {
                   1567:       md->end_match_ptr = eptr;      /* For ONCE */
                   1568:       md->end_offset_top = offset_top;
                   1569:       md->start_match_ptr = mstart;
                   1570:       MRRETURN(MATCH_MATCH);
                   1571:       }
                   1572: 
                   1573:     /* For capturing groups we have to check the group number back at the start
                   1574:     and if necessary complete handling an extraction by setting the offsets and
                   1575:     bumping the high water mark. Note that whole-pattern recursion is coded as
                   1576:     a recurse into group 0, so it won't be picked up here. Instead, we catch it
                   1577:     when the OP_END is reached. Other recursion is handled here. */
                   1578: 
                   1579:     if (*prev == OP_CBRA || *prev == OP_SCBRA)
                   1580:       {
                   1581:       number = GET2(prev, 1+LINK_SIZE);
                   1582:       offset = number << 1;
                   1583: 
                   1584: #ifdef PCRE_DEBUG
                   1585:       printf("end bracket %d", number);
                   1586:       printf("\n");
                   1587: #endif
                   1588: 
                   1589:       md->capture_last = number;
                   1590:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1591:         {
                   1592:         md->offset_vector[offset] =
                   1593:           md->offset_vector[md->offset_end - number];
                   1594:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1595:         if (offset_top <= offset) offset_top = offset + 2;
                   1596:         }
                   1597: 
                   1598:       /* Handle a recursively called group. Restore the offsets
                   1599:       appropriately and continue from after the call. */
                   1600: 
                   1601:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1602:         {
                   1603:         recursion_info *rec = md->recursive;
                   1604:         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
                   1605:         md->recursive = rec->prevrec;
                   1606:         memcpy(md->offset_vector, rec->offset_save,
                   1607:           rec->saved_max * sizeof(int));
                   1608:         offset_top = rec->save_offset_top;
                   1609:         ecode = rec->after_call;
                   1610:         ims = original_ims;
                   1611:         break;
                   1612:         }
                   1613:       }
                   1614: 
                   1615:     /* For both capturing and non-capturing groups, reset the value of the ims
                   1616:     flags, in case they got changed during the group. */
                   1617: 
                   1618:     ims = original_ims;
                   1619:     DPRINTF(("ims reset to %02lx\n", ims));
                   1620: 
                   1621:     /* For a non-repeating ket, just continue at this level. This also
                   1622:     happens for a repeating ket if no characters were matched in the group.
                   1623:     This is the forcible breaking of infinite loops as implemented in Perl
                   1624:     5.005. If there is an options reset, it will get obeyed in the normal
                   1625:     course of events. */
                   1626: 
                   1627:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1628:       {
                   1629:       ecode += 1 + LINK_SIZE;
                   1630:       break;
                   1631:       }
                   1632: 
                   1633:     /* The repeating kets try the rest of the pattern or restart from the
                   1634:     preceding bracket, in the appropriate order. In the second case, we can use
                   1635:     tail recursion to avoid using another stack frame, unless we have an
                   1636:     unlimited repeat of a group that can match an empty string. */
                   1637: 
                   1638:     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
                   1639: 
                   1640:     if (*ecode == OP_KETRMIN)
                   1641:       {
                   1642:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
                   1643:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1644:       if (flags != 0)    /* Could match an empty string */
                   1645:         {
                   1646:         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
                   1647:         RRETURN(rrc);
                   1648:         }
                   1649:       ecode = prev;
                   1650:       goto TAIL_RECURSE;
                   1651:       }
                   1652:     else  /* OP_KETRMAX */
                   1653:       {
                   1654:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
                   1655:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1656:       ecode += 1 + LINK_SIZE;
                   1657:       flags = 0;
                   1658:       goto TAIL_RECURSE;
                   1659:       }
                   1660:     /* Control never gets here */
                   1661: 
                   1662:     /* Start of subject unless notbol, or after internal newline if multiline */
                   1663: 
                   1664:     case OP_CIRC:
                   1665:     if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
                   1666:     if ((ims & PCRE_MULTILINE) != 0)
                   1667:       {
                   1668:       if (eptr != md->start_subject &&
                   1669:           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   1670:         MRRETURN(MATCH_NOMATCH);
                   1671:       ecode++;
                   1672:       break;
                   1673:       }
                   1674:     /* ... else fall through */
                   1675: 
                   1676:     /* Start of subject assertion */
                   1677: 
                   1678:     case OP_SOD:
                   1679:     if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
                   1680:     ecode++;
                   1681:     break;
                   1682: 
                   1683:     /* Start of match assertion */
                   1684: 
                   1685:     case OP_SOM:
                   1686:     if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
                   1687:     ecode++;
                   1688:     break;
                   1689: 
                   1690:     /* Reset the start of match point */
                   1691: 
                   1692:     case OP_SET_SOM:
                   1693:     mstart = eptr;
                   1694:     ecode++;
                   1695:     break;
                   1696: 
                   1697:     /* Assert before internal newline if multiline, or before a terminating
                   1698:     newline unless endonly is set, else end of subject unless noteol is set. */
                   1699: 
                   1700:     case OP_DOLL:
                   1701:     if ((ims & PCRE_MULTILINE) != 0)
                   1702:       {
                   1703:       if (eptr < md->end_subject)
                   1704:         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
                   1705:       else
                   1706:         {
                   1707:         if (md->noteol) MRRETURN(MATCH_NOMATCH);
                   1708:         SCHECK_PARTIAL();
                   1709:         }
                   1710:       ecode++;
                   1711:       break;
                   1712:       }
                   1713:     else  /* Not multiline */
                   1714:       {
                   1715:       if (md->noteol) MRRETURN(MATCH_NOMATCH);
                   1716:       if (!md->endonly) goto ASSERT_NL_OR_EOS;
                   1717:       }
                   1718: 
                   1719:     /* ... else fall through for endonly */
                   1720: 
                   1721:     /* End of subject assertion (\z) */
                   1722: 
                   1723:     case OP_EOD:
                   1724:     if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
                   1725:     SCHECK_PARTIAL();
                   1726:     ecode++;
                   1727:     break;
                   1728: 
                   1729:     /* End of subject or ending \n assertion (\Z) */
                   1730: 
                   1731:     case OP_EODN:
                   1732:     ASSERT_NL_OR_EOS:
                   1733:     if (eptr < md->end_subject &&
                   1734:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
                   1735:       MRRETURN(MATCH_NOMATCH);
                   1736: 
                   1737:     /* Either at end of string or \n before end. */
                   1738: 
                   1739:     SCHECK_PARTIAL();
                   1740:     ecode++;
                   1741:     break;
                   1742: 
                   1743:     /* Word boundary assertions */
                   1744: 
                   1745:     case OP_NOT_WORD_BOUNDARY:
                   1746:     case OP_WORD_BOUNDARY:
                   1747:       {
                   1748: 
                   1749:       /* Find out if the previous and current characters are "word" characters.
                   1750:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   1751:       be "non-word" characters. Remember the earliest consulted character for
                   1752:       partial matching. */
                   1753: 
                   1754: #ifdef SUPPORT_UTF8
                   1755:       if (utf8)
                   1756:         {
                   1757:         /* Get status of previous character */
                   1758: 
                   1759:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1760:           {
                   1761:           USPTR lastptr = eptr - 1;
                   1762:           while((*lastptr & 0xc0) == 0x80) lastptr--;
                   1763:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
                   1764:           GETCHAR(c, lastptr);
                   1765: #ifdef SUPPORT_UCP
                   1766:           if (md->use_ucp)
                   1767:             {
                   1768:             if (c == '_') prev_is_word = TRUE; else
                   1769:               {
                   1770:               int cat = UCD_CATEGORY(c);
                   1771:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   1772:               }
                   1773:             }
                   1774:           else
                   1775: #endif
                   1776:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1777:           }
                   1778: 
                   1779:         /* Get status of next character */
                   1780: 
                   1781:         if (eptr >= md->end_subject)
                   1782:           {
                   1783:           SCHECK_PARTIAL();
                   1784:           cur_is_word = FALSE;
                   1785:           }
                   1786:         else
                   1787:           {
                   1788:           GETCHAR(c, eptr);
                   1789: #ifdef SUPPORT_UCP
                   1790:           if (md->use_ucp)
                   1791:             {
                   1792:             if (c == '_') cur_is_word = TRUE; else
                   1793:               {
                   1794:               int cat = UCD_CATEGORY(c);
                   1795:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   1796:               }
                   1797:             }
                   1798:           else
                   1799: #endif
                   1800:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1801:           }
                   1802:         }
                   1803:       else
                   1804: #endif
                   1805: 
                   1806:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   1807:       consistency with the behaviour of \w we do use it in this case. */
                   1808: 
                   1809:         {
                   1810:         /* Get status of previous character */
                   1811: 
                   1812:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1813:           {
                   1814:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   1815: #ifdef SUPPORT_UCP
                   1816:           if (md->use_ucp)
                   1817:             {
                   1818:             c = eptr[-1];
                   1819:             if (c == '_') prev_is_word = TRUE; else
                   1820:               {
                   1821:               int cat = UCD_CATEGORY(c);
                   1822:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   1823:               }
                   1824:             }
                   1825:           else
                   1826: #endif
                   1827:           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
                   1828:           }
                   1829: 
                   1830:         /* Get status of next character */
                   1831: 
                   1832:         if (eptr >= md->end_subject)
                   1833:           {
                   1834:           SCHECK_PARTIAL();
                   1835:           cur_is_word = FALSE;
                   1836:           }
                   1837:         else
                   1838: #ifdef SUPPORT_UCP
                   1839:         if (md->use_ucp)
                   1840:           {
                   1841:           c = *eptr;
                   1842:           if (c == '_') cur_is_word = TRUE; else
                   1843:             {
                   1844:             int cat = UCD_CATEGORY(c);
                   1845:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   1846:             }
                   1847:           }
                   1848:         else
                   1849: #endif
                   1850:         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
                   1851:         }
                   1852: 
                   1853:       /* Now see if the situation is what we want */
                   1854: 
                   1855:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   1856:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
                   1857:         MRRETURN(MATCH_NOMATCH);
                   1858:       }
                   1859:     break;
                   1860: 
                   1861:     /* Match a single character type; inline for speed */
                   1862: 
                   1863:     case OP_ANY:
                   1864:     if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
                   1865:     /* Fall through */
                   1866: 
                   1867:     case OP_ALLANY:
                   1868:     if (eptr++ >= md->end_subject)
                   1869:       {
                   1870:       SCHECK_PARTIAL();
                   1871:       MRRETURN(MATCH_NOMATCH);
                   1872:       }
                   1873:     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   1874:     ecode++;
                   1875:     break;
                   1876: 
                   1877:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   1878:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   1879: 
                   1880:     case OP_ANYBYTE:
                   1881:     if (eptr++ >= md->end_subject)
                   1882:       {
                   1883:       SCHECK_PARTIAL();
                   1884:       MRRETURN(MATCH_NOMATCH);
                   1885:       }
                   1886:     ecode++;
                   1887:     break;
                   1888: 
                   1889:     case OP_NOT_DIGIT:
                   1890:     if (eptr >= md->end_subject)
                   1891:       {
                   1892:       SCHECK_PARTIAL();
                   1893:       MRRETURN(MATCH_NOMATCH);
                   1894:       }
                   1895:     GETCHARINCTEST(c, eptr);
                   1896:     if (
                   1897: #ifdef SUPPORT_UTF8
                   1898:        c < 256 &&
                   1899: #endif
                   1900:        (md->ctypes[c] & ctype_digit) != 0
                   1901:        )
                   1902:       MRRETURN(MATCH_NOMATCH);
                   1903:     ecode++;
                   1904:     break;
                   1905: 
                   1906:     case OP_DIGIT:
                   1907:     if (eptr >= md->end_subject)
                   1908:       {
                   1909:       SCHECK_PARTIAL();
                   1910:       MRRETURN(MATCH_NOMATCH);
                   1911:       }
                   1912:     GETCHARINCTEST(c, eptr);
                   1913:     if (
                   1914: #ifdef SUPPORT_UTF8
                   1915:        c >= 256 ||
                   1916: #endif
                   1917:        (md->ctypes[c] & ctype_digit) == 0
                   1918:        )
                   1919:       MRRETURN(MATCH_NOMATCH);
                   1920:     ecode++;
                   1921:     break;
                   1922: 
                   1923:     case OP_NOT_WHITESPACE:
                   1924:     if (eptr >= md->end_subject)
                   1925:       {
                   1926:       SCHECK_PARTIAL();
                   1927:       MRRETURN(MATCH_NOMATCH);
                   1928:       }
                   1929:     GETCHARINCTEST(c, eptr);
                   1930:     if (
                   1931: #ifdef SUPPORT_UTF8
                   1932:        c < 256 &&
                   1933: #endif
                   1934:        (md->ctypes[c] & ctype_space) != 0
                   1935:        )
                   1936:       MRRETURN(MATCH_NOMATCH);
                   1937:     ecode++;
                   1938:     break;
                   1939: 
                   1940:     case OP_WHITESPACE:
                   1941:     if (eptr >= md->end_subject)
                   1942:       {
                   1943:       SCHECK_PARTIAL();
                   1944:       MRRETURN(MATCH_NOMATCH);
                   1945:       }
                   1946:     GETCHARINCTEST(c, eptr);
                   1947:     if (
                   1948: #ifdef SUPPORT_UTF8
                   1949:        c >= 256 ||
                   1950: #endif
                   1951:        (md->ctypes[c] & ctype_space) == 0
                   1952:        )
                   1953:       MRRETURN(MATCH_NOMATCH);
                   1954:     ecode++;
                   1955:     break;
                   1956: 
                   1957:     case OP_NOT_WORDCHAR:
                   1958:     if (eptr >= md->end_subject)
                   1959:       {
                   1960:       SCHECK_PARTIAL();
                   1961:       MRRETURN(MATCH_NOMATCH);
                   1962:       }
                   1963:     GETCHARINCTEST(c, eptr);
                   1964:     if (
                   1965: #ifdef SUPPORT_UTF8
                   1966:        c < 256 &&
                   1967: #endif
                   1968:        (md->ctypes[c] & ctype_word) != 0
                   1969:        )
                   1970:       MRRETURN(MATCH_NOMATCH);
                   1971:     ecode++;
                   1972:     break;
                   1973: 
                   1974:     case OP_WORDCHAR:
                   1975:     if (eptr >= md->end_subject)
                   1976:       {
                   1977:       SCHECK_PARTIAL();
                   1978:       MRRETURN(MATCH_NOMATCH);
                   1979:       }
                   1980:     GETCHARINCTEST(c, eptr);
                   1981:     if (
                   1982: #ifdef SUPPORT_UTF8
                   1983:        c >= 256 ||
                   1984: #endif
                   1985:        (md->ctypes[c] & ctype_word) == 0
                   1986:        )
                   1987:       MRRETURN(MATCH_NOMATCH);
                   1988:     ecode++;
                   1989:     break;
                   1990: 
                   1991:     case OP_ANYNL:
                   1992:     if (eptr >= md->end_subject)
                   1993:       {
                   1994:       SCHECK_PARTIAL();
                   1995:       MRRETURN(MATCH_NOMATCH);
                   1996:       }
                   1997:     GETCHARINCTEST(c, eptr);
                   1998:     switch(c)
                   1999:       {
                   2000:       default: MRRETURN(MATCH_NOMATCH);
                   2001:       case 0x000d:
                   2002:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   2003:       break;
                   2004: 
                   2005:       case 0x000a:
                   2006:       break;
                   2007: 
                   2008:       case 0x000b:
                   2009:       case 0x000c:
                   2010:       case 0x0085:
                   2011:       case 0x2028:
                   2012:       case 0x2029:
                   2013:       if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
                   2014:       break;
                   2015:       }
                   2016:     ecode++;
                   2017:     break;
                   2018: 
                   2019:     case OP_NOT_HSPACE:
                   2020:     if (eptr >= md->end_subject)
                   2021:       {
                   2022:       SCHECK_PARTIAL();
                   2023:       MRRETURN(MATCH_NOMATCH);
                   2024:       }
                   2025:     GETCHARINCTEST(c, eptr);
                   2026:     switch(c)
                   2027:       {
                   2028:       default: break;
                   2029:       case 0x09:      /* HT */
                   2030:       case 0x20:      /* SPACE */
                   2031:       case 0xa0:      /* NBSP */
                   2032:       case 0x1680:    /* OGHAM SPACE MARK */
                   2033:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2034:       case 0x2000:    /* EN QUAD */
                   2035:       case 0x2001:    /* EM QUAD */
                   2036:       case 0x2002:    /* EN SPACE */
                   2037:       case 0x2003:    /* EM SPACE */
                   2038:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2039:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2040:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2041:       case 0x2007:    /* FIGURE SPACE */
                   2042:       case 0x2008:    /* PUNCTUATION SPACE */
                   2043:       case 0x2009:    /* THIN SPACE */
                   2044:       case 0x200A:    /* HAIR SPACE */
                   2045:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2046:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2047:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2048:       MRRETURN(MATCH_NOMATCH);
                   2049:       }
                   2050:     ecode++;
                   2051:     break;
                   2052: 
                   2053:     case OP_HSPACE:
                   2054:     if (eptr >= md->end_subject)
                   2055:       {
                   2056:       SCHECK_PARTIAL();
                   2057:       MRRETURN(MATCH_NOMATCH);
                   2058:       }
                   2059:     GETCHARINCTEST(c, eptr);
                   2060:     switch(c)
                   2061:       {
                   2062:       default: MRRETURN(MATCH_NOMATCH);
                   2063:       case 0x09:      /* HT */
                   2064:       case 0x20:      /* SPACE */
                   2065:       case 0xa0:      /* NBSP */
                   2066:       case 0x1680:    /* OGHAM SPACE MARK */
                   2067:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2068:       case 0x2000:    /* EN QUAD */
                   2069:       case 0x2001:    /* EM QUAD */
                   2070:       case 0x2002:    /* EN SPACE */
                   2071:       case 0x2003:    /* EM SPACE */
                   2072:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2073:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2074:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2075:       case 0x2007:    /* FIGURE SPACE */
                   2076:       case 0x2008:    /* PUNCTUATION SPACE */
                   2077:       case 0x2009:    /* THIN SPACE */
                   2078:       case 0x200A:    /* HAIR SPACE */
                   2079:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2080:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2081:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2082:       break;
                   2083:       }
                   2084:     ecode++;
                   2085:     break;
                   2086: 
                   2087:     case OP_NOT_VSPACE:
                   2088:     if (eptr >= md->end_subject)
                   2089:       {
                   2090:       SCHECK_PARTIAL();
                   2091:       MRRETURN(MATCH_NOMATCH);
                   2092:       }
                   2093:     GETCHARINCTEST(c, eptr);
                   2094:     switch(c)
                   2095:       {
                   2096:       default: break;
                   2097:       case 0x0a:      /* LF */
                   2098:       case 0x0b:      /* VT */
                   2099:       case 0x0c:      /* FF */
                   2100:       case 0x0d:      /* CR */
                   2101:       case 0x85:      /* NEL */
                   2102:       case 0x2028:    /* LINE SEPARATOR */
                   2103:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2104:       MRRETURN(MATCH_NOMATCH);
                   2105:       }
                   2106:     ecode++;
                   2107:     break;
                   2108: 
                   2109:     case OP_VSPACE:
                   2110:     if (eptr >= md->end_subject)
                   2111:       {
                   2112:       SCHECK_PARTIAL();
                   2113:       MRRETURN(MATCH_NOMATCH);
                   2114:       }
                   2115:     GETCHARINCTEST(c, eptr);
                   2116:     switch(c)
                   2117:       {
                   2118:       default: MRRETURN(MATCH_NOMATCH);
                   2119:       case 0x0a:      /* LF */
                   2120:       case 0x0b:      /* VT */
                   2121:       case 0x0c:      /* FF */
                   2122:       case 0x0d:      /* CR */
                   2123:       case 0x85:      /* NEL */
                   2124:       case 0x2028:    /* LINE SEPARATOR */
                   2125:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2126:       break;
                   2127:       }
                   2128:     ecode++;
                   2129:     break;
                   2130: 
                   2131: #ifdef SUPPORT_UCP
                   2132:     /* Check the next character by Unicode property. We will get here only
                   2133:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2134: 
                   2135:     case OP_PROP:
                   2136:     case OP_NOTPROP:
                   2137:     if (eptr >= md->end_subject)
                   2138:       {
                   2139:       SCHECK_PARTIAL();
                   2140:       MRRETURN(MATCH_NOMATCH);
                   2141:       }
                   2142:     GETCHARINCTEST(c, eptr);
                   2143:       {
                   2144:       const ucd_record *prop = GET_UCD(c);
                   2145: 
                   2146:       switch(ecode[1])
                   2147:         {
                   2148:         case PT_ANY:
                   2149:         if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
                   2150:         break;
                   2151: 
                   2152:         case PT_LAMP:
                   2153:         if ((prop->chartype == ucp_Lu ||
                   2154:              prop->chartype == ucp_Ll ||
                   2155:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
                   2156:           MRRETURN(MATCH_NOMATCH);
                   2157:         break;
                   2158: 
                   2159:         case PT_GC:
                   2160:         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
                   2161:           MRRETURN(MATCH_NOMATCH);
                   2162:         break;
                   2163: 
                   2164:         case PT_PC:
                   2165:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
                   2166:           MRRETURN(MATCH_NOMATCH);
                   2167:         break;
                   2168: 
                   2169:         case PT_SC:
                   2170:         if ((ecode[2] != prop->script) == (op == OP_PROP))
                   2171:           MRRETURN(MATCH_NOMATCH);
                   2172:         break;
                   2173: 
                   2174:         /* These are specials */
                   2175: 
                   2176:         case PT_ALNUM:
                   2177:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   2178:              _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
                   2179:           MRRETURN(MATCH_NOMATCH);
                   2180:         break;
                   2181: 
                   2182:         case PT_SPACE:    /* Perl space */
                   2183:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   2184:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2185:                == (op == OP_NOTPROP))
                   2186:           MRRETURN(MATCH_NOMATCH);
                   2187:         break;
                   2188: 
                   2189:         case PT_PXSPACE:  /* POSIX space */
                   2190:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   2191:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2192:              c == CHAR_FF || c == CHAR_CR)
                   2193:                == (op == OP_NOTPROP))
                   2194:           MRRETURN(MATCH_NOMATCH);
                   2195:         break;
                   2196: 
                   2197:         case PT_WORD:
                   2198:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   2199:              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   2200:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
                   2201:           MRRETURN(MATCH_NOMATCH);
                   2202:         break;
                   2203: 
                   2204:         /* This should never occur */
                   2205: 
                   2206:         default:
                   2207:         RRETURN(PCRE_ERROR_INTERNAL);
                   2208:         }
                   2209: 
                   2210:       ecode += 3;
                   2211:       }
                   2212:     break;
                   2213: 
                   2214:     /* Match an extended Unicode sequence. We will get here only if the support
                   2215:     is in the binary; otherwise a compile-time error occurs. */
                   2216: 
                   2217:     case OP_EXTUNI:
                   2218:     if (eptr >= md->end_subject)
                   2219:       {
                   2220:       SCHECK_PARTIAL();
                   2221:       MRRETURN(MATCH_NOMATCH);
                   2222:       }
                   2223:     GETCHARINCTEST(c, eptr);
                   2224:       {
                   2225:       int category = UCD_CATEGORY(c);
                   2226:       if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
                   2227:       while (eptr < md->end_subject)
                   2228:         {
                   2229:         int len = 1;
                   2230:         if (!utf8) c = *eptr; else
                   2231:           {
                   2232:           GETCHARLEN(c, eptr, len);
                   2233:           }
                   2234:         category = UCD_CATEGORY(c);
                   2235:         if (category != ucp_M) break;
                   2236:         eptr += len;
                   2237:         }
                   2238:       }
                   2239:     ecode++;
                   2240:     break;
                   2241: #endif
                   2242: 
                   2243: 
                   2244:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2245:     item to see if there is repeat information following. The code is similar
                   2246:     to that for character classes, but repeated for efficiency. Then obey
                   2247:     similar code to character type repeats - written out again for speed.
                   2248:     However, if the referenced string is the empty string, always treat
                   2249:     it as matched, any number of times (otherwise there could be infinite
                   2250:     loops). */
                   2251: 
                   2252:     case OP_REF:
                   2253:       {
                   2254:       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   2255:       ecode += 3;
                   2256: 
                   2257:       /* If the reference is unset, there are two possibilities:
                   2258: 
                   2259:       (a) In the default, Perl-compatible state, set the length to be longer
                   2260:       than the amount of subject left; this ensures that every attempt at a
                   2261:       match fails. We can't just fail here, because of the possibility of
                   2262:       quantifiers with zero minima.
                   2263: 
                   2264:       (b) If the JavaScript compatibility flag is set, set the length to zero
                   2265:       so that the back reference matches an empty string.
                   2266: 
                   2267:       Otherwise, set the length to the length of what was matched by the
                   2268:       referenced subpattern. */
                   2269: 
                   2270:       if (offset >= offset_top || md->offset_vector[offset] < 0)
                   2271:         length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
                   2272:       else
                   2273:         length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2274: 
                   2275:       /* Set up for repetition, or handle the non-repeated case */
                   2276: 
                   2277:       switch (*ecode)
                   2278:         {
                   2279:         case OP_CRSTAR:
                   2280:         case OP_CRMINSTAR:
                   2281:         case OP_CRPLUS:
                   2282:         case OP_CRMINPLUS:
                   2283:         case OP_CRQUERY:
                   2284:         case OP_CRMINQUERY:
                   2285:         c = *ecode++ - OP_CRSTAR;
                   2286:         minimize = (c & 1) != 0;
                   2287:         min = rep_min[c];                 /* Pick up values from tables; */
                   2288:         max = rep_max[c];                 /* zero for max => infinity */
                   2289:         if (max == 0) max = INT_MAX;
                   2290:         break;
                   2291: 
                   2292:         case OP_CRRANGE:
                   2293:         case OP_CRMINRANGE:
                   2294:         minimize = (*ecode == OP_CRMINRANGE);
                   2295:         min = GET2(ecode, 1);
                   2296:         max = GET2(ecode, 3);
                   2297:         if (max == 0) max = INT_MAX;
                   2298:         ecode += 5;
                   2299:         break;
                   2300: 
                   2301:         default:               /* No repeat follows */
                   2302:         if (!match_ref(offset, eptr, length, md, ims))
                   2303:           {
                   2304:           CHECK_PARTIAL();
                   2305:           MRRETURN(MATCH_NOMATCH);
                   2306:           }
                   2307:         eptr += length;
                   2308:         continue;              /* With the main loop */
                   2309:         }
                   2310: 
                   2311:       /* If the length of the reference is zero, just continue with the
                   2312:       main loop. */
                   2313: 
                   2314:       if (length == 0) continue;
                   2315: 
                   2316:       /* First, ensure the minimum number of matches are present. We get back
                   2317:       the length of the reference string explicitly rather than passing the
                   2318:       address of eptr, so that eptr can be a register variable. */
                   2319: 
                   2320:       for (i = 1; i <= min; i++)
                   2321:         {
                   2322:         if (!match_ref(offset, eptr, length, md, ims))
                   2323:           {
                   2324:           CHECK_PARTIAL();
                   2325:           MRRETURN(MATCH_NOMATCH);
                   2326:           }
                   2327:         eptr += length;
                   2328:         }
                   2329: 
                   2330:       /* If min = max, continue at the same level without recursion.
                   2331:       They are not both allowed to be zero. */
                   2332: 
                   2333:       if (min == max) continue;
                   2334: 
                   2335:       /* If minimizing, keep trying and advancing the pointer */
                   2336: 
                   2337:       if (minimize)
                   2338:         {
                   2339:         for (fi = min;; fi++)
                   2340:           {
                   2341:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
                   2342:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2343:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2344:           if (!match_ref(offset, eptr, length, md, ims))
                   2345:             {
                   2346:             CHECK_PARTIAL();
                   2347:             MRRETURN(MATCH_NOMATCH);
                   2348:             }
                   2349:           eptr += length;
                   2350:           }
                   2351:         /* Control never gets here */
                   2352:         }
                   2353: 
                   2354:       /* If maximizing, find the longest string and work backwards */
                   2355: 
                   2356:       else
                   2357:         {
                   2358:         pp = eptr;
                   2359:         for (i = min; i < max; i++)
                   2360:           {
                   2361:           if (!match_ref(offset, eptr, length, md, ims))
                   2362:             {
                   2363:             CHECK_PARTIAL();
                   2364:             break;
                   2365:             }
                   2366:           eptr += length;
                   2367:           }
                   2368:         while (eptr >= pp)
                   2369:           {
                   2370:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
                   2371:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2372:           eptr -= length;
                   2373:           }
                   2374:         MRRETURN(MATCH_NOMATCH);
                   2375:         }
                   2376:       }
                   2377:     /* Control never gets here */
                   2378: 
                   2379:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2380:     used when all the characters in the class have values in the range 0-255,
                   2381:     and either the matching is caseful, or the characters are in the range
                   2382:     0-127 when UTF-8 processing is enabled. The only difference between
                   2383:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2384:     encountered.
                   2385: 
                   2386:     First, look past the end of the item to see if there is repeat information
                   2387:     following. Then obey similar code to character type repeats - written out
                   2388:     again for speed. */
                   2389: 
                   2390:     case OP_NCLASS:
                   2391:     case OP_CLASS:
                   2392:       {
                   2393:       data = ecode + 1;                /* Save for matching */
                   2394:       ecode += 33;                     /* Advance past the item */
                   2395: 
                   2396:       switch (*ecode)
                   2397:         {
                   2398:         case OP_CRSTAR:
                   2399:         case OP_CRMINSTAR:
                   2400:         case OP_CRPLUS:
                   2401:         case OP_CRMINPLUS:
                   2402:         case OP_CRQUERY:
                   2403:         case OP_CRMINQUERY:
                   2404:         c = *ecode++ - OP_CRSTAR;
                   2405:         minimize = (c & 1) != 0;
                   2406:         min = rep_min[c];                 /* Pick up values from tables; */
                   2407:         max = rep_max[c];                 /* zero for max => infinity */
                   2408:         if (max == 0) max = INT_MAX;
                   2409:         break;
                   2410: 
                   2411:         case OP_CRRANGE:
                   2412:         case OP_CRMINRANGE:
                   2413:         minimize = (*ecode == OP_CRMINRANGE);
                   2414:         min = GET2(ecode, 1);
                   2415:         max = GET2(ecode, 3);
                   2416:         if (max == 0) max = INT_MAX;
                   2417:         ecode += 5;
                   2418:         break;
                   2419: 
                   2420:         default:               /* No repeat follows */
                   2421:         min = max = 1;
                   2422:         break;
                   2423:         }
                   2424: 
                   2425:       /* First, ensure the minimum number of matches are present. */
                   2426: 
                   2427: #ifdef SUPPORT_UTF8
                   2428:       /* UTF-8 mode */
                   2429:       if (utf8)
                   2430:         {
                   2431:         for (i = 1; i <= min; i++)
                   2432:           {
                   2433:           if (eptr >= md->end_subject)
                   2434:             {
                   2435:             SCHECK_PARTIAL();
                   2436:             MRRETURN(MATCH_NOMATCH);
                   2437:             }
                   2438:           GETCHARINC(c, eptr);
                   2439:           if (c > 255)
                   2440:             {
                   2441:             if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
                   2442:             }
                   2443:           else
                   2444:             {
                   2445:             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
                   2446:             }
                   2447:           }
                   2448:         }
                   2449:       else
                   2450: #endif
                   2451:       /* Not UTF-8 mode */
                   2452:         {
                   2453:         for (i = 1; i <= min; i++)
                   2454:           {
                   2455:           if (eptr >= md->end_subject)
                   2456:             {
                   2457:             SCHECK_PARTIAL();
                   2458:             MRRETURN(MATCH_NOMATCH);
                   2459:             }
                   2460:           c = *eptr++;
                   2461:           if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
                   2462:           }
                   2463:         }
                   2464: 
                   2465:       /* If max == min we can continue with the main loop without the
                   2466:       need to recurse. */
                   2467: 
                   2468:       if (min == max) continue;
                   2469: 
                   2470:       /* If minimizing, keep testing the rest of the expression and advancing
                   2471:       the pointer while it matches the class. */
                   2472: 
                   2473:       if (minimize)
                   2474:         {
                   2475: #ifdef SUPPORT_UTF8
                   2476:         /* UTF-8 mode */
                   2477:         if (utf8)
                   2478:           {
                   2479:           for (fi = min;; fi++)
                   2480:             {
                   2481:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
                   2482:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2483:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2484:             if (eptr >= md->end_subject)
                   2485:               {
                   2486:               SCHECK_PARTIAL();
                   2487:               MRRETURN(MATCH_NOMATCH);
                   2488:               }
                   2489:             GETCHARINC(c, eptr);
                   2490:             if (c > 255)
                   2491:               {
                   2492:               if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
                   2493:               }
                   2494:             else
                   2495:               {
                   2496:               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
                   2497:               }
                   2498:             }
                   2499:           }
                   2500:         else
                   2501: #endif
                   2502:         /* Not UTF-8 mode */
                   2503:           {
                   2504:           for (fi = min;; fi++)
                   2505:             {
                   2506:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
                   2507:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2508:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2509:             if (eptr >= md->end_subject)
                   2510:               {
                   2511:               SCHECK_PARTIAL();
                   2512:               MRRETURN(MATCH_NOMATCH);
                   2513:               }
                   2514:             c = *eptr++;
                   2515:             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
                   2516:             }
                   2517:           }
                   2518:         /* Control never gets here */
                   2519:         }
                   2520: 
                   2521:       /* If maximizing, find the longest possible run, then work backwards. */
                   2522: 
                   2523:       else
                   2524:         {
                   2525:         pp = eptr;
                   2526: 
                   2527: #ifdef SUPPORT_UTF8
                   2528:         /* UTF-8 mode */
                   2529:         if (utf8)
                   2530:           {
                   2531:           for (i = min; i < max; i++)
                   2532:             {
                   2533:             int len = 1;
                   2534:             if (eptr >= md->end_subject)
                   2535:               {
                   2536:               SCHECK_PARTIAL();
                   2537:               break;
                   2538:               }
                   2539:             GETCHARLEN(c, eptr, len);
                   2540:             if (c > 255)
                   2541:               {
                   2542:               if (op == OP_CLASS) break;
                   2543:               }
                   2544:             else
                   2545:               {
                   2546:               if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2547:               }
                   2548:             eptr += len;
                   2549:             }
                   2550:           for (;;)
                   2551:             {
                   2552:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
                   2553:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2554:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2555:             BACKCHAR(eptr);
                   2556:             }
                   2557:           }
                   2558:         else
                   2559: #endif
                   2560:           /* Not UTF-8 mode */
                   2561:           {
                   2562:           for (i = min; i < max; i++)
                   2563:             {
                   2564:             if (eptr >= md->end_subject)
                   2565:               {
                   2566:               SCHECK_PARTIAL();
                   2567:               break;
                   2568:               }
                   2569:             c = *eptr;
                   2570:             if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2571:             eptr++;
                   2572:             }
                   2573:           while (eptr >= pp)
                   2574:             {
                   2575:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
                   2576:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2577:             eptr--;
                   2578:             }
                   2579:           }
                   2580: 
                   2581:         MRRETURN(MATCH_NOMATCH);
                   2582:         }
                   2583:       }
                   2584:     /* Control never gets here */
                   2585: 
                   2586: 
                   2587:     /* Match an extended character class. This opcode is encountered only
                   2588:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   2589:     mode, because Unicode properties are supported in non-UTF-8 mode. */
                   2590: 
                   2591: #ifdef SUPPORT_UTF8
                   2592:     case OP_XCLASS:
                   2593:       {
                   2594:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2595:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2596: 
                   2597:       switch (*ecode)
                   2598:         {
                   2599:         case OP_CRSTAR:
                   2600:         case OP_CRMINSTAR:
                   2601:         case OP_CRPLUS:
                   2602:         case OP_CRMINPLUS:
                   2603:         case OP_CRQUERY:
                   2604:         case OP_CRMINQUERY:
                   2605:         c = *ecode++ - OP_CRSTAR;
                   2606:         minimize = (c & 1) != 0;
                   2607:         min = rep_min[c];                 /* Pick up values from tables; */
                   2608:         max = rep_max[c];                 /* zero for max => infinity */
                   2609:         if (max == 0) max = INT_MAX;
                   2610:         break;
                   2611: 
                   2612:         case OP_CRRANGE:
                   2613:         case OP_CRMINRANGE:
                   2614:         minimize = (*ecode == OP_CRMINRANGE);
                   2615:         min = GET2(ecode, 1);
                   2616:         max = GET2(ecode, 3);
                   2617:         if (max == 0) max = INT_MAX;
                   2618:         ecode += 5;
                   2619:         break;
                   2620: 
                   2621:         default:               /* No repeat follows */
                   2622:         min = max = 1;
                   2623:         break;
                   2624:         }
                   2625: 
                   2626:       /* First, ensure the minimum number of matches are present. */
                   2627: 
                   2628:       for (i = 1; i <= min; i++)
                   2629:         {
                   2630:         if (eptr >= md->end_subject)
                   2631:           {
                   2632:           SCHECK_PARTIAL();
                   2633:           MRRETURN(MATCH_NOMATCH);
                   2634:           }
                   2635:         GETCHARINCTEST(c, eptr);
                   2636:         if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
                   2637:         }
                   2638: 
                   2639:       /* If max == min we can continue with the main loop without the
                   2640:       need to recurse. */
                   2641: 
                   2642:       if (min == max) continue;
                   2643: 
                   2644:       /* If minimizing, keep testing the rest of the expression and advancing
                   2645:       the pointer while it matches the class. */
                   2646: 
                   2647:       if (minimize)
                   2648:         {
                   2649:         for (fi = min;; fi++)
                   2650:           {
                   2651:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
                   2652:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2653:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2654:           if (eptr >= md->end_subject)
                   2655:             {
                   2656:             SCHECK_PARTIAL();
                   2657:             MRRETURN(MATCH_NOMATCH);
                   2658:             }
                   2659:           GETCHARINCTEST(c, eptr);
                   2660:           if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
                   2661:           }
                   2662:         /* Control never gets here */
                   2663:         }
                   2664: 
                   2665:       /* If maximizing, find the longest possible run, then work backwards. */
                   2666: 
                   2667:       else
                   2668:         {
                   2669:         pp = eptr;
                   2670:         for (i = min; i < max; i++)
                   2671:           {
                   2672:           int len = 1;
                   2673:           if (eptr >= md->end_subject)
                   2674:             {
                   2675:             SCHECK_PARTIAL();
                   2676:             break;
                   2677:             }
                   2678:           GETCHARLENTEST(c, eptr, len);
                   2679:           if (!_pcre_xclass(c, data)) break;
                   2680:           eptr += len;
                   2681:           }
                   2682:         for(;;)
                   2683:           {
                   2684:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
                   2685:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2686:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2687:           if (utf8) BACKCHAR(eptr);
                   2688:           }
                   2689:         MRRETURN(MATCH_NOMATCH);
                   2690:         }
                   2691: 
                   2692:       /* Control never gets here */
                   2693:       }
                   2694: #endif    /* End of XCLASS */
                   2695: 
                   2696:     /* Match a single character, casefully */
                   2697: 
                   2698:     case OP_CHAR:
                   2699: #ifdef SUPPORT_UTF8
                   2700:     if (utf8)
                   2701:       {
                   2702:       length = 1;
                   2703:       ecode++;
                   2704:       GETCHARLEN(fc, ecode, length);
                   2705:       if (length > md->end_subject - eptr)
                   2706:         {
                   2707:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   2708:         MRRETURN(MATCH_NOMATCH);
                   2709:         }
                   2710:       while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
                   2711:       }
                   2712:     else
                   2713: #endif
                   2714: 
                   2715:     /* Non-UTF-8 mode */
                   2716:       {
                   2717:       if (md->end_subject - eptr < 1)
                   2718:         {
                   2719:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   2720:         MRRETURN(MATCH_NOMATCH);
                   2721:         }
                   2722:       if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
                   2723:       ecode += 2;
                   2724:       }
                   2725:     break;
                   2726: 
                   2727:     /* Match a single character, caselessly */
                   2728: 
                   2729:     case OP_CHARNC:
                   2730: #ifdef SUPPORT_UTF8
                   2731:     if (utf8)
                   2732:       {
                   2733:       length = 1;
                   2734:       ecode++;
                   2735:       GETCHARLEN(fc, ecode, length);
                   2736: 
                   2737:       if (length > md->end_subject - eptr)
                   2738:         {
                   2739:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   2740:         MRRETURN(MATCH_NOMATCH);
                   2741:         }
                   2742: 
                   2743:       /* If the pattern character's value is < 128, we have only one byte, and
                   2744:       can use the fast lookup table. */
                   2745: 
                   2746:       if (fc < 128)
                   2747:         {
                   2748:         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   2749:         }
                   2750: 
                   2751:       /* Otherwise we must pick up the subject character */
                   2752: 
                   2753:       else
                   2754:         {
                   2755:         unsigned int dc;
                   2756:         GETCHARINC(dc, eptr);
                   2757:         ecode += length;
                   2758: 
                   2759:         /* If we have Unicode property support, we can use it to test the other
                   2760:         case of the character, if there is one. */
                   2761: 
                   2762:         if (fc != dc)
                   2763:           {
                   2764: #ifdef SUPPORT_UCP
                   2765:           if (dc != UCD_OTHERCASE(fc))
                   2766: #endif
                   2767:             MRRETURN(MATCH_NOMATCH);
                   2768:           }
                   2769:         }
                   2770:       }
                   2771:     else
                   2772: #endif   /* SUPPORT_UTF8 */
                   2773: 
                   2774:     /* Non-UTF-8 mode */
                   2775:       {
                   2776:       if (md->end_subject - eptr < 1)
                   2777:         {
                   2778:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   2779:         MRRETURN(MATCH_NOMATCH);
                   2780:         }
                   2781:       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   2782:       ecode += 2;
                   2783:       }
                   2784:     break;
                   2785: 
                   2786:     /* Match a single character repeatedly. */
                   2787: 
                   2788:     case OP_EXACT:
                   2789:     min = max = GET2(ecode, 1);
                   2790:     ecode += 3;
                   2791:     goto REPEATCHAR;
                   2792: 
                   2793:     case OP_POSUPTO:
                   2794:     possessive = TRUE;
                   2795:     /* Fall through */
                   2796: 
                   2797:     case OP_UPTO:
                   2798:     case OP_MINUPTO:
                   2799:     min = 0;
                   2800:     max = GET2(ecode, 1);
                   2801:     minimize = *ecode == OP_MINUPTO;
                   2802:     ecode += 3;
                   2803:     goto REPEATCHAR;
                   2804: 
                   2805:     case OP_POSSTAR:
                   2806:     possessive = TRUE;
                   2807:     min = 0;
                   2808:     max = INT_MAX;
                   2809:     ecode++;
                   2810:     goto REPEATCHAR;
                   2811: 
                   2812:     case OP_POSPLUS:
                   2813:     possessive = TRUE;
                   2814:     min = 1;
                   2815:     max = INT_MAX;
                   2816:     ecode++;
                   2817:     goto REPEATCHAR;
                   2818: 
                   2819:     case OP_POSQUERY:
                   2820:     possessive = TRUE;
                   2821:     min = 0;
                   2822:     max = 1;
                   2823:     ecode++;
                   2824:     goto REPEATCHAR;
                   2825: 
                   2826:     case OP_STAR:
                   2827:     case OP_MINSTAR:
                   2828:     case OP_PLUS:
                   2829:     case OP_MINPLUS:
                   2830:     case OP_QUERY:
                   2831:     case OP_MINQUERY:
                   2832:     c = *ecode++ - OP_STAR;
                   2833:     minimize = (c & 1) != 0;
                   2834: 
                   2835:     min = rep_min[c];                 /* Pick up values from tables; */
                   2836:     max = rep_max[c];                 /* zero for max => infinity */
                   2837:     if (max == 0) max = INT_MAX;
                   2838: 
                   2839:     /* Common code for all repeated single-character matches. */
                   2840: 
                   2841:     REPEATCHAR:
                   2842: #ifdef SUPPORT_UTF8
                   2843:     if (utf8)
                   2844:       {
                   2845:       length = 1;
                   2846:       charptr = ecode;
                   2847:       GETCHARLEN(fc, ecode, length);
                   2848:       ecode += length;
                   2849: 
                   2850:       /* Handle multibyte character matching specially here. There is
                   2851:       support for caseless matching if UCP support is present. */
                   2852: 
                   2853:       if (length > 1)
                   2854:         {
                   2855: #ifdef SUPPORT_UCP
                   2856:         unsigned int othercase;
                   2857:         if ((ims & PCRE_CASELESS) != 0 &&
                   2858:             (othercase = UCD_OTHERCASE(fc)) != fc)
                   2859:           oclength = _pcre_ord2utf8(othercase, occhars);
                   2860:         else oclength = 0;
                   2861: #endif  /* SUPPORT_UCP */
                   2862: 
                   2863:         for (i = 1; i <= min; i++)
                   2864:           {
                   2865:           if (eptr <= md->end_subject - length &&
                   2866:             memcmp(eptr, charptr, length) == 0) eptr += length;
                   2867: #ifdef SUPPORT_UCP
                   2868:           else if (oclength > 0 &&
                   2869:                    eptr <= md->end_subject - oclength &&
                   2870:                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   2871: #endif  /* SUPPORT_UCP */
                   2872:           else
                   2873:             {
                   2874:             CHECK_PARTIAL();
                   2875:             MRRETURN(MATCH_NOMATCH);
                   2876:             }
                   2877:           }
                   2878: 
                   2879:         if (min == max) continue;
                   2880: 
                   2881:         if (minimize)
                   2882:           {
                   2883:           for (fi = min;; fi++)
                   2884:             {
                   2885:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
                   2886:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2887:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2888:             if (eptr <= md->end_subject - length &&
                   2889:               memcmp(eptr, charptr, length) == 0) eptr += length;
                   2890: #ifdef SUPPORT_UCP
                   2891:             else if (oclength > 0 &&
                   2892:                      eptr <= md->end_subject - oclength &&
                   2893:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   2894: #endif  /* SUPPORT_UCP */
                   2895:             else
                   2896:               {
                   2897:               CHECK_PARTIAL();
                   2898:               MRRETURN(MATCH_NOMATCH);
                   2899:               }
                   2900:             }
                   2901:           /* Control never gets here */
                   2902:           }
                   2903: 
                   2904:         else  /* Maximize */
                   2905:           {
                   2906:           pp = eptr;
                   2907:           for (i = min; i < max; i++)
                   2908:             {
                   2909:             if (eptr <= md->end_subject - length &&
                   2910:                 memcmp(eptr, charptr, length) == 0) eptr += length;
                   2911: #ifdef SUPPORT_UCP
                   2912:             else if (oclength > 0 &&
                   2913:                      eptr <= md->end_subject - oclength &&
                   2914:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   2915: #endif  /* SUPPORT_UCP */
                   2916:             else
                   2917:               {
                   2918:               CHECK_PARTIAL();
                   2919:               break;
                   2920:               }
                   2921:             }
                   2922: 
                   2923:           if (possessive) continue;
                   2924: 
                   2925:           for(;;)
                   2926:             {
                   2927:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
                   2928:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2929:             if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
                   2930: #ifdef SUPPORT_UCP
                   2931:             eptr--;
                   2932:             BACKCHAR(eptr);
                   2933: #else   /* without SUPPORT_UCP */
                   2934:             eptr -= length;
                   2935: #endif  /* SUPPORT_UCP */
                   2936:             }
                   2937:           }
                   2938:         /* Control never gets here */
                   2939:         }
                   2940: 
                   2941:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   2942:       obey the code as for non-UTF-8 characters below, though in this case the
                   2943:       value of fc will always be < 128. */
                   2944:       }
                   2945:     else
                   2946: #endif  /* SUPPORT_UTF8 */
                   2947: 
                   2948:     /* When not in UTF-8 mode, load a single-byte character. */
                   2949: 
                   2950:     fc = *ecode++;
                   2951: 
                   2952:     /* The value of fc at this point is always less than 256, though we may or
                   2953:     may not be in UTF-8 mode. The code is duplicated for the caseless and
                   2954:     caseful cases, for speed, since matching characters is likely to be quite
                   2955:     common. First, ensure the minimum number of matches are present. If min =
                   2956:     max, continue at the same level without recursing. Otherwise, if
                   2957:     minimizing, keep trying the rest of the expression and advancing one
                   2958:     matching character if failing, up to the maximum. Alternatively, if
                   2959:     maximizing, find the maximum number of characters and work backwards. */
                   2960: 
                   2961:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2962:       max, eptr));
                   2963: 
                   2964:     if ((ims & PCRE_CASELESS) != 0)
                   2965:       {
                   2966:       fc = md->lcc[fc];
                   2967:       for (i = 1; i <= min; i++)
                   2968:         {
                   2969:         if (eptr >= md->end_subject)
                   2970:           {
                   2971:           SCHECK_PARTIAL();
                   2972:           MRRETURN(MATCH_NOMATCH);
                   2973:           }
                   2974:         if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   2975:         }
                   2976:       if (min == max) continue;
                   2977:       if (minimize)
                   2978:         {
                   2979:         for (fi = min;; fi++)
                   2980:           {
                   2981:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
                   2982:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2983:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2984:           if (eptr >= md->end_subject)
                   2985:             {
                   2986:             SCHECK_PARTIAL();
                   2987:             MRRETURN(MATCH_NOMATCH);
                   2988:             }
                   2989:           if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   2990:           }
                   2991:         /* Control never gets here */
                   2992:         }
                   2993:       else  /* Maximize */
                   2994:         {
                   2995:         pp = eptr;
                   2996:         for (i = min; i < max; i++)
                   2997:           {
                   2998:           if (eptr >= md->end_subject)
                   2999:             {
                   3000:             SCHECK_PARTIAL();
                   3001:             break;
                   3002:             }
                   3003:           if (fc != md->lcc[*eptr]) break;
                   3004:           eptr++;
                   3005:           }
                   3006: 
                   3007:         if (possessive) continue;
                   3008: 
                   3009:         while (eptr >= pp)
                   3010:           {
                   3011:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
                   3012:           eptr--;
                   3013:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3014:           }
                   3015:         MRRETURN(MATCH_NOMATCH);
                   3016:         }
                   3017:       /* Control never gets here */
                   3018:       }
                   3019: 
                   3020:     /* Caseful comparisons (includes all multi-byte characters) */
                   3021: 
                   3022:     else
                   3023:       {
                   3024:       for (i = 1; i <= min; i++)
                   3025:         {
                   3026:         if (eptr >= md->end_subject)
                   3027:           {
                   3028:           SCHECK_PARTIAL();
                   3029:           MRRETURN(MATCH_NOMATCH);
                   3030:           }
                   3031:         if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
                   3032:         }
                   3033: 
                   3034:       if (min == max) continue;
                   3035: 
                   3036:       if (minimize)
                   3037:         {
                   3038:         for (fi = min;; fi++)
                   3039:           {
                   3040:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
                   3041:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3042:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3043:           if (eptr >= md->end_subject)
                   3044:             {
                   3045:             SCHECK_PARTIAL();
                   3046:             MRRETURN(MATCH_NOMATCH);
                   3047:             }
                   3048:           if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
                   3049:           }
                   3050:         /* Control never gets here */
                   3051:         }
                   3052:       else  /* Maximize */
                   3053:         {
                   3054:         pp = eptr;
                   3055:         for (i = min; i < max; i++)
                   3056:           {
                   3057:           if (eptr >= md->end_subject)
                   3058:             {
                   3059:             SCHECK_PARTIAL();
                   3060:             break;
                   3061:             }
                   3062:           if (fc != *eptr) break;
                   3063:           eptr++;
                   3064:           }
                   3065:         if (possessive) continue;
                   3066: 
                   3067:         while (eptr >= pp)
                   3068:           {
                   3069:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
                   3070:           eptr--;
                   3071:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3072:           }
                   3073:         MRRETURN(MATCH_NOMATCH);
                   3074:         }
                   3075:       }
                   3076:     /* Control never gets here */
                   3077: 
                   3078:     /* Match a negated single one-byte character. The character we are
                   3079:     checking can be multibyte. */
                   3080: 
                   3081:     case OP_NOT:
                   3082:     if (eptr >= md->end_subject)
                   3083:       {
                   3084:       SCHECK_PARTIAL();
                   3085:       MRRETURN(MATCH_NOMATCH);
                   3086:       }
                   3087:     ecode++;
                   3088:     GETCHARINCTEST(c, eptr);
                   3089:     if ((ims & PCRE_CASELESS) != 0)
                   3090:       {
                   3091: #ifdef SUPPORT_UTF8
                   3092:       if (c < 256)
                   3093: #endif
                   3094:       c = md->lcc[c];
                   3095:       if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
                   3096:       }
                   3097:     else
                   3098:       {
                   3099:       if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
                   3100:       }
                   3101:     break;
                   3102: 
                   3103:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3104:     repeat of the code for a repeated single character, but I haven't found a
                   3105:     nice way of commoning these up that doesn't require a test of the
                   3106:     positive/negative option for each character match. Maybe that wouldn't add
                   3107:     very much to the time taken, but character matching *is* what this is all
                   3108:     about... */
                   3109: 
                   3110:     case OP_NOTEXACT:
                   3111:     min = max = GET2(ecode, 1);
                   3112:     ecode += 3;
                   3113:     goto REPEATNOTCHAR;
                   3114: 
                   3115:     case OP_NOTUPTO:
                   3116:     case OP_NOTMINUPTO:
                   3117:     min = 0;
                   3118:     max = GET2(ecode, 1);
                   3119:     minimize = *ecode == OP_NOTMINUPTO;
                   3120:     ecode += 3;
                   3121:     goto REPEATNOTCHAR;
                   3122: 
                   3123:     case OP_NOTPOSSTAR:
                   3124:     possessive = TRUE;
                   3125:     min = 0;
                   3126:     max = INT_MAX;
                   3127:     ecode++;
                   3128:     goto REPEATNOTCHAR;
                   3129: 
                   3130:     case OP_NOTPOSPLUS:
                   3131:     possessive = TRUE;
                   3132:     min = 1;
                   3133:     max = INT_MAX;
                   3134:     ecode++;
                   3135:     goto REPEATNOTCHAR;
                   3136: 
                   3137:     case OP_NOTPOSQUERY:
                   3138:     possessive = TRUE;
                   3139:     min = 0;
                   3140:     max = 1;
                   3141:     ecode++;
                   3142:     goto REPEATNOTCHAR;
                   3143: 
                   3144:     case OP_NOTPOSUPTO:
                   3145:     possessive = TRUE;
                   3146:     min = 0;
                   3147:     max = GET2(ecode, 1);
                   3148:     ecode += 3;
                   3149:     goto REPEATNOTCHAR;
                   3150: 
                   3151:     case OP_NOTSTAR:
                   3152:     case OP_NOTMINSTAR:
                   3153:     case OP_NOTPLUS:
                   3154:     case OP_NOTMINPLUS:
                   3155:     case OP_NOTQUERY:
                   3156:     case OP_NOTMINQUERY:
                   3157:     c = *ecode++ - OP_NOTSTAR;
                   3158:     minimize = (c & 1) != 0;
                   3159:     min = rep_min[c];                 /* Pick up values from tables; */
                   3160:     max = rep_max[c];                 /* zero for max => infinity */
                   3161:     if (max == 0) max = INT_MAX;
                   3162: 
                   3163:     /* Common code for all repeated single-byte matches. */
                   3164: 
                   3165:     REPEATNOTCHAR:
                   3166:     fc = *ecode++;
                   3167: 
                   3168:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3169:     since matching characters is likely to be quite common. First, ensure the
                   3170:     minimum number of matches are present. If min = max, continue at the same
                   3171:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3172:     the expression and advancing one matching character if failing, up to the
                   3173:     maximum. Alternatively, if maximizing, find the maximum number of
                   3174:     characters and work backwards. */
                   3175: 
                   3176:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3177:       max, eptr));
                   3178: 
                   3179:     if ((ims & PCRE_CASELESS) != 0)
                   3180:       {
                   3181:       fc = md->lcc[fc];
                   3182: 
                   3183: #ifdef SUPPORT_UTF8
                   3184:       /* UTF-8 mode */
                   3185:       if (utf8)
                   3186:         {
                   3187:         register unsigned int d;
                   3188:         for (i = 1; i <= min; i++)
                   3189:           {
                   3190:           if (eptr >= md->end_subject)
                   3191:             {
                   3192:             SCHECK_PARTIAL();
                   3193:             MRRETURN(MATCH_NOMATCH);
                   3194:             }
                   3195:           GETCHARINC(d, eptr);
                   3196:           if (d < 256) d = md->lcc[d];
                   3197:           if (fc == d) MRRETURN(MATCH_NOMATCH);
                   3198:           }
                   3199:         }
                   3200:       else
                   3201: #endif
                   3202: 
                   3203:       /* Not UTF-8 mode */
                   3204:         {
                   3205:         for (i = 1; i <= min; i++)
                   3206:           {
                   3207:           if (eptr >= md->end_subject)
                   3208:             {
                   3209:             SCHECK_PARTIAL();
                   3210:             MRRETURN(MATCH_NOMATCH);
                   3211:             }
                   3212:           if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   3213:           }
                   3214:         }
                   3215: 
                   3216:       if (min == max) continue;
                   3217: 
                   3218:       if (minimize)
                   3219:         {
                   3220: #ifdef SUPPORT_UTF8
                   3221:         /* UTF-8 mode */
                   3222:         if (utf8)
                   3223:           {
                   3224:           register unsigned int d;
                   3225:           for (fi = min;; fi++)
                   3226:             {
                   3227:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
                   3228:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3229:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3230:             if (eptr >= md->end_subject)
                   3231:               {
                   3232:               SCHECK_PARTIAL();
                   3233:               MRRETURN(MATCH_NOMATCH);
                   3234:               }
                   3235:             GETCHARINC(d, eptr);
                   3236:             if (d < 256) d = md->lcc[d];
                   3237:             if (fc == d) MRRETURN(MATCH_NOMATCH);
                   3238:             }
                   3239:           }
                   3240:         else
                   3241: #endif
                   3242:         /* Not UTF-8 mode */
                   3243:           {
                   3244:           for (fi = min;; fi++)
                   3245:             {
                   3246:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
                   3247:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3248:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3249:             if (eptr >= md->end_subject)
                   3250:               {
                   3251:               SCHECK_PARTIAL();
                   3252:               MRRETURN(MATCH_NOMATCH);
                   3253:               }
                   3254:             if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   3255:             }
                   3256:           }
                   3257:         /* Control never gets here */
                   3258:         }
                   3259: 
                   3260:       /* Maximize case */
                   3261: 
                   3262:       else
                   3263:         {
                   3264:         pp = eptr;
                   3265: 
                   3266: #ifdef SUPPORT_UTF8
                   3267:         /* UTF-8 mode */
                   3268:         if (utf8)
                   3269:           {
                   3270:           register unsigned int d;
                   3271:           for (i = min; i < max; i++)
                   3272:             {
                   3273:             int len = 1;
                   3274:             if (eptr >= md->end_subject)
                   3275:               {
                   3276:               SCHECK_PARTIAL();
                   3277:               break;
                   3278:               }
                   3279:             GETCHARLEN(d, eptr, len);
                   3280:             if (d < 256) d = md->lcc[d];
                   3281:             if (fc == d) break;
                   3282:             eptr += len;
                   3283:             }
                   3284:         if (possessive) continue;
                   3285:         for(;;)
                   3286:             {
                   3287:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
                   3288:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3289:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3290:             BACKCHAR(eptr);
                   3291:             }
                   3292:           }
                   3293:         else
                   3294: #endif
                   3295:         /* Not UTF-8 mode */
                   3296:           {
                   3297:           for (i = min; i < max; i++)
                   3298:             {
                   3299:             if (eptr >= md->end_subject)
                   3300:               {
                   3301:               SCHECK_PARTIAL();
                   3302:               break;
                   3303:               }
                   3304:             if (fc == md->lcc[*eptr]) break;
                   3305:             eptr++;
                   3306:             }
                   3307:           if (possessive) continue;
                   3308:           while (eptr >= pp)
                   3309:             {
                   3310:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
                   3311:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3312:             eptr--;
                   3313:             }
                   3314:           }
                   3315: 
                   3316:         MRRETURN(MATCH_NOMATCH);
                   3317:         }
                   3318:       /* Control never gets here */
                   3319:       }
                   3320: 
                   3321:     /* Caseful comparisons */
                   3322: 
                   3323:     else
                   3324:       {
                   3325: #ifdef SUPPORT_UTF8
                   3326:       /* UTF-8 mode */
                   3327:       if (utf8)
                   3328:         {
                   3329:         register unsigned int d;
                   3330:         for (i = 1; i <= min; i++)
                   3331:           {
                   3332:           if (eptr >= md->end_subject)
                   3333:             {
                   3334:             SCHECK_PARTIAL();
                   3335:             MRRETURN(MATCH_NOMATCH);
                   3336:             }
                   3337:           GETCHARINC(d, eptr);
                   3338:           if (fc == d) MRRETURN(MATCH_NOMATCH);
                   3339:           }
                   3340:         }
                   3341:       else
                   3342: #endif
                   3343:       /* Not UTF-8 mode */
                   3344:         {
                   3345:         for (i = 1; i <= min; i++)
                   3346:           {
                   3347:           if (eptr >= md->end_subject)
                   3348:             {
                   3349:             SCHECK_PARTIAL();
                   3350:             MRRETURN(MATCH_NOMATCH);
                   3351:             }
                   3352:           if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
                   3353:           }
                   3354:         }
                   3355: 
                   3356:       if (min == max) continue;
                   3357: 
                   3358:       if (minimize)
                   3359:         {
                   3360: #ifdef SUPPORT_UTF8
                   3361:         /* UTF-8 mode */
                   3362:         if (utf8)
                   3363:           {
                   3364:           register unsigned int d;
                   3365:           for (fi = min;; fi++)
                   3366:             {
                   3367:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
                   3368:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3369:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3370:             if (eptr >= md->end_subject)
                   3371:               {
                   3372:               SCHECK_PARTIAL();
                   3373:               MRRETURN(MATCH_NOMATCH);
                   3374:               }
                   3375:             GETCHARINC(d, eptr);
                   3376:             if (fc == d) MRRETURN(MATCH_NOMATCH);
                   3377:             }
                   3378:           }
                   3379:         else
                   3380: #endif
                   3381:         /* Not UTF-8 mode */
                   3382:           {
                   3383:           for (fi = min;; fi++)
                   3384:             {
                   3385:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
                   3386:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3387:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3388:             if (eptr >= md->end_subject)
                   3389:               {
                   3390:               SCHECK_PARTIAL();
                   3391:               MRRETURN(MATCH_NOMATCH);
                   3392:               }
                   3393:             if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
                   3394:             }
                   3395:           }
                   3396:         /* Control never gets here */
                   3397:         }
                   3398: 
                   3399:       /* Maximize case */
                   3400: 
                   3401:       else
                   3402:         {
                   3403:         pp = eptr;
                   3404: 
                   3405: #ifdef SUPPORT_UTF8
                   3406:         /* UTF-8 mode */
                   3407:         if (utf8)
                   3408:           {
                   3409:           register unsigned int d;
                   3410:           for (i = min; i < max; i++)
                   3411:             {
                   3412:             int len = 1;
                   3413:             if (eptr >= md->end_subject)
                   3414:               {
                   3415:               SCHECK_PARTIAL();
                   3416:               break;
                   3417:               }
                   3418:             GETCHARLEN(d, eptr, len);
                   3419:             if (fc == d) break;
                   3420:             eptr += len;
                   3421:             }
                   3422:           if (possessive) continue;
                   3423:           for(;;)
                   3424:             {
                   3425:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
                   3426:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3427:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3428:             BACKCHAR(eptr);
                   3429:             }
                   3430:           }
                   3431:         else
                   3432: #endif
                   3433:         /* Not UTF-8 mode */
                   3434:           {
                   3435:           for (i = min; i < max; i++)
                   3436:             {
                   3437:             if (eptr >= md->end_subject)
                   3438:               {
                   3439:               SCHECK_PARTIAL();
                   3440:               break;
                   3441:               }
                   3442:             if (fc == *eptr) break;
                   3443:             eptr++;
                   3444:             }
                   3445:           if (possessive) continue;
                   3446:           while (eptr >= pp)
                   3447:             {
                   3448:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
                   3449:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3450:             eptr--;
                   3451:             }
                   3452:           }
                   3453: 
                   3454:         MRRETURN(MATCH_NOMATCH);
                   3455:         }
                   3456:       }
                   3457:     /* Control never gets here */
                   3458: 
                   3459:     /* Match a single character type repeatedly; several different opcodes
                   3460:     share code. This is very similar to the code for single characters, but we
                   3461:     repeat it in the interests of efficiency. */
                   3462: 
                   3463:     case OP_TYPEEXACT:
                   3464:     min = max = GET2(ecode, 1);
                   3465:     minimize = TRUE;
                   3466:     ecode += 3;
                   3467:     goto REPEATTYPE;
                   3468: 
                   3469:     case OP_TYPEUPTO:
                   3470:     case OP_TYPEMINUPTO:
                   3471:     min = 0;
                   3472:     max = GET2(ecode, 1);
                   3473:     minimize = *ecode == OP_TYPEMINUPTO;
                   3474:     ecode += 3;
                   3475:     goto REPEATTYPE;
                   3476: 
                   3477:     case OP_TYPEPOSSTAR:
                   3478:     possessive = TRUE;
                   3479:     min = 0;
                   3480:     max = INT_MAX;
                   3481:     ecode++;
                   3482:     goto REPEATTYPE;
                   3483: 
                   3484:     case OP_TYPEPOSPLUS:
                   3485:     possessive = TRUE;
                   3486:     min = 1;
                   3487:     max = INT_MAX;
                   3488:     ecode++;
                   3489:     goto REPEATTYPE;
                   3490: 
                   3491:     case OP_TYPEPOSQUERY:
                   3492:     possessive = TRUE;
                   3493:     min = 0;
                   3494:     max = 1;
                   3495:     ecode++;
                   3496:     goto REPEATTYPE;
                   3497: 
                   3498:     case OP_TYPEPOSUPTO:
                   3499:     possessive = TRUE;
                   3500:     min = 0;
                   3501:     max = GET2(ecode, 1);
                   3502:     ecode += 3;
                   3503:     goto REPEATTYPE;
                   3504: 
                   3505:     case OP_TYPESTAR:
                   3506:     case OP_TYPEMINSTAR:
                   3507:     case OP_TYPEPLUS:
                   3508:     case OP_TYPEMINPLUS:
                   3509:     case OP_TYPEQUERY:
                   3510:     case OP_TYPEMINQUERY:
                   3511:     c = *ecode++ - OP_TYPESTAR;
                   3512:     minimize = (c & 1) != 0;
                   3513:     min = rep_min[c];                 /* Pick up values from tables; */
                   3514:     max = rep_max[c];                 /* zero for max => infinity */
                   3515:     if (max == 0) max = INT_MAX;
                   3516: 
                   3517:     /* Common code for all repeated single character type matches. Note that
                   3518:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   3519:     character types, the valid characters are all one-byte long. */
                   3520: 
                   3521:     REPEATTYPE:
                   3522:     ctype = *ecode++;      /* Code for the character type */
                   3523: 
                   3524: #ifdef SUPPORT_UCP
                   3525:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   3526:       {
                   3527:       prop_fail_result = ctype == OP_NOTPROP;
                   3528:       prop_type = *ecode++;
                   3529:       prop_value = *ecode++;
                   3530:       }
                   3531:     else prop_type = -1;
                   3532: #endif
                   3533: 
                   3534:     /* First, ensure the minimum number of matches are present. Use inline
                   3535:     code for maximizing the speed, and do the type test once at the start
                   3536:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
                   3537:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   3538:     and single-bytes. */
                   3539: 
                   3540:     if (min > 0)
                   3541:       {
                   3542: #ifdef SUPPORT_UCP
                   3543:       if (prop_type >= 0)
                   3544:         {
                   3545:         switch(prop_type)
                   3546:           {
                   3547:           case PT_ANY:
                   3548:           if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
                   3549:           for (i = 1; i <= min; i++)
                   3550:             {
                   3551:             if (eptr >= md->end_subject)
                   3552:               {
                   3553:               SCHECK_PARTIAL();
                   3554:               MRRETURN(MATCH_NOMATCH);
                   3555:               }
                   3556:             GETCHARINCTEST(c, eptr);
                   3557:             }
                   3558:           break;
                   3559: 
                   3560:           case PT_LAMP:
                   3561:           for (i = 1; i <= min; i++)
                   3562:             {
                   3563:             if (eptr >= md->end_subject)
                   3564:               {
                   3565:               SCHECK_PARTIAL();
                   3566:               MRRETURN(MATCH_NOMATCH);
                   3567:               }
                   3568:             GETCHARINCTEST(c, eptr);
                   3569:             prop_chartype = UCD_CHARTYPE(c);
                   3570:             if ((prop_chartype == ucp_Lu ||
                   3571:                  prop_chartype == ucp_Ll ||
                   3572:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   3573:               MRRETURN(MATCH_NOMATCH);
                   3574:             }
                   3575:           break;
                   3576: 
                   3577:           case PT_GC:
                   3578:           for (i = 1; i <= min; i++)
                   3579:             {
                   3580:             if (eptr >= md->end_subject)
                   3581:               {
                   3582:               SCHECK_PARTIAL();
                   3583:               MRRETURN(MATCH_NOMATCH);
                   3584:               }
                   3585:             GETCHARINCTEST(c, eptr);
                   3586:             prop_category = UCD_CATEGORY(c);
                   3587:             if ((prop_category == prop_value) == prop_fail_result)
                   3588:               MRRETURN(MATCH_NOMATCH);
                   3589:             }
                   3590:           break;
                   3591: 
                   3592:           case PT_PC:
                   3593:           for (i = 1; i <= min; i++)
                   3594:             {
                   3595:             if (eptr >= md->end_subject)
                   3596:               {
                   3597:               SCHECK_PARTIAL();
                   3598:               MRRETURN(MATCH_NOMATCH);
                   3599:               }
                   3600:             GETCHARINCTEST(c, eptr);
                   3601:             prop_chartype = UCD_CHARTYPE(c);
                   3602:             if ((prop_chartype == prop_value) == prop_fail_result)
                   3603:               MRRETURN(MATCH_NOMATCH);
                   3604:             }
                   3605:           break;
                   3606: 
                   3607:           case PT_SC:
                   3608:           for (i = 1; i <= min; i++)
                   3609:             {
                   3610:             if (eptr >= md->end_subject)
                   3611:               {
                   3612:               SCHECK_PARTIAL();
                   3613:               MRRETURN(MATCH_NOMATCH);
                   3614:               }
                   3615:             GETCHARINCTEST(c, eptr);
                   3616:             prop_script = UCD_SCRIPT(c);
                   3617:             if ((prop_script == prop_value) == prop_fail_result)
                   3618:               MRRETURN(MATCH_NOMATCH);
                   3619:             }
                   3620:           break;
                   3621: 
                   3622:           case PT_ALNUM:
                   3623:           for (i = 1; i <= min; i++)
                   3624:             {
                   3625:             if (eptr >= md->end_subject)
                   3626:               {
                   3627:               SCHECK_PARTIAL();
                   3628:               MRRETURN(MATCH_NOMATCH);
                   3629:               }
                   3630:             GETCHARINCTEST(c, eptr);
                   3631:             prop_category = UCD_CATEGORY(c);
                   3632:             if ((prop_category == ucp_L || prop_category == ucp_N)
                   3633:                    == prop_fail_result)
                   3634:               MRRETURN(MATCH_NOMATCH);
                   3635:             }
                   3636:           break;
                   3637: 
                   3638:           case PT_SPACE:    /* Perl space */
                   3639:           for (i = 1; i <= min; i++)
                   3640:             {
                   3641:             if (eptr >= md->end_subject)
                   3642:               {
                   3643:               SCHECK_PARTIAL();
                   3644:               MRRETURN(MATCH_NOMATCH);
                   3645:               }
                   3646:             GETCHARINCTEST(c, eptr);
                   3647:             prop_category = UCD_CATEGORY(c);
                   3648:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   3649:                  c == CHAR_FF || c == CHAR_CR)
                   3650:                    == prop_fail_result)
                   3651:               MRRETURN(MATCH_NOMATCH);
                   3652:             }
                   3653:           break;
                   3654: 
                   3655:           case PT_PXSPACE:  /* POSIX space */
                   3656:           for (i = 1; i <= min; i++)
                   3657:             {
                   3658:             if (eptr >= md->end_subject)
                   3659:               {
                   3660:               SCHECK_PARTIAL();
                   3661:               MRRETURN(MATCH_NOMATCH);
                   3662:               }
                   3663:             GETCHARINCTEST(c, eptr);
                   3664:             prop_category = UCD_CATEGORY(c);
                   3665:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   3666:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   3667:                    == prop_fail_result)
                   3668:               MRRETURN(MATCH_NOMATCH);
                   3669:             }
                   3670:           break;
                   3671: 
                   3672:           case PT_WORD:
                   3673:           for (i = 1; i <= min; i++)
                   3674:             {
                   3675:             if (eptr >= md->end_subject)
                   3676:               {
                   3677:               SCHECK_PARTIAL();
                   3678:               MRRETURN(MATCH_NOMATCH);
                   3679:               }
                   3680:             GETCHARINCTEST(c, eptr);
                   3681:             prop_category = UCD_CATEGORY(c);
                   3682:             if ((prop_category == ucp_L || prop_category == ucp_N ||
                   3683:                  c == CHAR_UNDERSCORE)
                   3684:                    == prop_fail_result)
                   3685:               MRRETURN(MATCH_NOMATCH);
                   3686:             }
                   3687:           break;
                   3688: 
                   3689:           /* This should not occur */
                   3690: 
                   3691:           default:
                   3692:           RRETURN(PCRE_ERROR_INTERNAL);
                   3693:           }
                   3694:         }
                   3695: 
                   3696:       /* Match extended Unicode sequences. We will get here only if the
                   3697:       support is in the binary; otherwise a compile-time error occurs. */
                   3698: 
                   3699:       else if (ctype == OP_EXTUNI)
                   3700:         {
                   3701:         for (i = 1; i <= min; i++)
                   3702:           {
                   3703:           if (eptr >= md->end_subject)
                   3704:             {
                   3705:             SCHECK_PARTIAL();
                   3706:             MRRETURN(MATCH_NOMATCH);
                   3707:             }
                   3708:           GETCHARINCTEST(c, eptr);
                   3709:           prop_category = UCD_CATEGORY(c);
                   3710:           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
                   3711:           while (eptr < md->end_subject)
                   3712:             {
                   3713:             int len = 1;
                   3714:             if (!utf8) c = *eptr;
                   3715:               else { GETCHARLEN(c, eptr, len); }
                   3716:             prop_category = UCD_CATEGORY(c);
                   3717:             if (prop_category != ucp_M) break;
                   3718:             eptr += len;
                   3719:             }
                   3720:           }
                   3721:         }
                   3722: 
                   3723:       else
                   3724: #endif     /* SUPPORT_UCP */
                   3725: 
                   3726: /* Handle all other cases when the coding is UTF-8 */
                   3727: 
                   3728: #ifdef SUPPORT_UTF8
                   3729:       if (utf8) switch(ctype)
                   3730:         {
                   3731:         case OP_ANY:
                   3732:         for (i = 1; i <= min; i++)
                   3733:           {
                   3734:           if (eptr >= md->end_subject)
                   3735:             {
                   3736:             SCHECK_PARTIAL();
                   3737:             MRRETURN(MATCH_NOMATCH);
                   3738:             }
                   3739:           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
                   3740:           eptr++;
                   3741:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3742:           }
                   3743:         break;
                   3744: 
                   3745:         case OP_ALLANY:
                   3746:         for (i = 1; i <= min; i++)
                   3747:           {
                   3748:           if (eptr >= md->end_subject)
                   3749:             {
                   3750:             SCHECK_PARTIAL();
                   3751:             MRRETURN(MATCH_NOMATCH);
                   3752:             }
                   3753:           eptr++;
                   3754:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3755:           }
                   3756:         break;
                   3757: 
                   3758:         case OP_ANYBYTE:
                   3759:         if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
                   3760:         eptr += min;
                   3761:         break;
                   3762: 
                   3763:         case OP_ANYNL:
                   3764:         for (i = 1; i <= min; i++)
                   3765:           {
                   3766:           if (eptr >= md->end_subject)
                   3767:             {
                   3768:             SCHECK_PARTIAL();
                   3769:             MRRETURN(MATCH_NOMATCH);
                   3770:             }
                   3771:           GETCHARINC(c, eptr);
                   3772:           switch(c)
                   3773:             {
                   3774:             default: MRRETURN(MATCH_NOMATCH);
                   3775:             case 0x000d:
                   3776:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3777:             break;
                   3778: 
                   3779:             case 0x000a:
                   3780:             break;
                   3781: 
                   3782:             case 0x000b:
                   3783:             case 0x000c:
                   3784:             case 0x0085:
                   3785:             case 0x2028:
                   3786:             case 0x2029:
                   3787:             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
                   3788:             break;
                   3789:             }
                   3790:           }
                   3791:         break;
                   3792: 
                   3793:         case OP_NOT_HSPACE:
                   3794:         for (i = 1; i <= min; i++)
                   3795:           {
                   3796:           if (eptr >= md->end_subject)
                   3797:             {
                   3798:             SCHECK_PARTIAL();
                   3799:             MRRETURN(MATCH_NOMATCH);
                   3800:             }
                   3801:           GETCHARINC(c, eptr);
                   3802:           switch(c)
                   3803:             {
                   3804:             default: break;
                   3805:             case 0x09:      /* HT */
                   3806:             case 0x20:      /* SPACE */
                   3807:             case 0xa0:      /* NBSP */
                   3808:             case 0x1680:    /* OGHAM SPACE MARK */
                   3809:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3810:             case 0x2000:    /* EN QUAD */
                   3811:             case 0x2001:    /* EM QUAD */
                   3812:             case 0x2002:    /* EN SPACE */
                   3813:             case 0x2003:    /* EM SPACE */
                   3814:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3815:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3816:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3817:             case 0x2007:    /* FIGURE SPACE */
                   3818:             case 0x2008:    /* PUNCTUATION SPACE */
                   3819:             case 0x2009:    /* THIN SPACE */
                   3820:             case 0x200A:    /* HAIR SPACE */
                   3821:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3822:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3823:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3824:             MRRETURN(MATCH_NOMATCH);
                   3825:             }
                   3826:           }
                   3827:         break;
                   3828: 
                   3829:         case OP_HSPACE:
                   3830:         for (i = 1; i <= min; i++)
                   3831:           {
                   3832:           if (eptr >= md->end_subject)
                   3833:             {
                   3834:             SCHECK_PARTIAL();
                   3835:             MRRETURN(MATCH_NOMATCH);
                   3836:             }
                   3837:           GETCHARINC(c, eptr);
                   3838:           switch(c)
                   3839:             {
                   3840:             default: MRRETURN(MATCH_NOMATCH);
                   3841:             case 0x09:      /* HT */
                   3842:             case 0x20:      /* SPACE */
                   3843:             case 0xa0:      /* NBSP */
                   3844:             case 0x1680:    /* OGHAM SPACE MARK */
                   3845:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3846:             case 0x2000:    /* EN QUAD */
                   3847:             case 0x2001:    /* EM QUAD */
                   3848:             case 0x2002:    /* EN SPACE */
                   3849:             case 0x2003:    /* EM SPACE */
                   3850:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3851:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3852:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3853:             case 0x2007:    /* FIGURE SPACE */
                   3854:             case 0x2008:    /* PUNCTUATION SPACE */
                   3855:             case 0x2009:    /* THIN SPACE */
                   3856:             case 0x200A:    /* HAIR SPACE */
                   3857:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3858:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3859:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3860:             break;
                   3861:             }
                   3862:           }
                   3863:         break;
                   3864: 
                   3865:         case OP_NOT_VSPACE:
                   3866:         for (i = 1; i <= min; i++)
                   3867:           {
                   3868:           if (eptr >= md->end_subject)
                   3869:             {
                   3870:             SCHECK_PARTIAL();
                   3871:             MRRETURN(MATCH_NOMATCH);
                   3872:             }
                   3873:           GETCHARINC(c, eptr);
                   3874:           switch(c)
                   3875:             {
                   3876:             default: break;
                   3877:             case 0x0a:      /* LF */
                   3878:             case 0x0b:      /* VT */
                   3879:             case 0x0c:      /* FF */
                   3880:             case 0x0d:      /* CR */
                   3881:             case 0x85:      /* NEL */
                   3882:             case 0x2028:    /* LINE SEPARATOR */
                   3883:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3884:             MRRETURN(MATCH_NOMATCH);
                   3885:             }
                   3886:           }
                   3887:         break;
                   3888: 
                   3889:         case OP_VSPACE:
                   3890:         for (i = 1; i <= min; i++)
                   3891:           {
                   3892:           if (eptr >= md->end_subject)
                   3893:             {
                   3894:             SCHECK_PARTIAL();
                   3895:             MRRETURN(MATCH_NOMATCH);
                   3896:             }
                   3897:           GETCHARINC(c, eptr);
                   3898:           switch(c)
                   3899:             {
                   3900:             default: MRRETURN(MATCH_NOMATCH);
                   3901:             case 0x0a:      /* LF */
                   3902:             case 0x0b:      /* VT */
                   3903:             case 0x0c:      /* FF */
                   3904:             case 0x0d:      /* CR */
                   3905:             case 0x85:      /* NEL */
                   3906:             case 0x2028:    /* LINE SEPARATOR */
                   3907:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3908:             break;
                   3909:             }
                   3910:           }
                   3911:         break;
                   3912: 
                   3913:         case OP_NOT_DIGIT:
                   3914:         for (i = 1; i <= min; i++)
                   3915:           {
                   3916:           if (eptr >= md->end_subject)
                   3917:             {
                   3918:             SCHECK_PARTIAL();
                   3919:             MRRETURN(MATCH_NOMATCH);
                   3920:             }
                   3921:           GETCHARINC(c, eptr);
                   3922:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
                   3923:             MRRETURN(MATCH_NOMATCH);
                   3924:           }
                   3925:         break;
                   3926: 
                   3927:         case OP_DIGIT:
                   3928:         for (i = 1; i <= min; i++)
                   3929:           {
                   3930:           if (eptr >= md->end_subject)
                   3931:             {
                   3932:             SCHECK_PARTIAL();
                   3933:             MRRETURN(MATCH_NOMATCH);
                   3934:             }
                   3935:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
                   3936:             MRRETURN(MATCH_NOMATCH);
                   3937:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3938:           }
                   3939:         break;
                   3940: 
                   3941:         case OP_NOT_WHITESPACE:
                   3942:         for (i = 1; i <= min; i++)
                   3943:           {
                   3944:           if (eptr >= md->end_subject)
                   3945:             {
                   3946:             SCHECK_PARTIAL();
                   3947:             MRRETURN(MATCH_NOMATCH);
                   3948:             }
                   3949:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
                   3950:             MRRETURN(MATCH_NOMATCH);
                   3951:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3952:           }
                   3953:         break;
                   3954: 
                   3955:         case OP_WHITESPACE:
                   3956:         for (i = 1; i <= min; i++)
                   3957:           {
                   3958:           if (eptr >= md->end_subject)
                   3959:             {
                   3960:             SCHECK_PARTIAL();
                   3961:             MRRETURN(MATCH_NOMATCH);
                   3962:             }
                   3963:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
                   3964:             MRRETURN(MATCH_NOMATCH);
                   3965:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3966:           }
                   3967:         break;
                   3968: 
                   3969:         case OP_NOT_WORDCHAR:
                   3970:         for (i = 1; i <= min; i++)
                   3971:           {
                   3972:           if (eptr >= md->end_subject)
                   3973:             {
                   3974:             SCHECK_PARTIAL();
                   3975:             MRRETURN(MATCH_NOMATCH);
                   3976:             }
                   3977:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
                   3978:             MRRETURN(MATCH_NOMATCH);
                   3979:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3980:           }
                   3981:         break;
                   3982: 
                   3983:         case OP_WORDCHAR:
                   3984:         for (i = 1; i <= min; i++)
                   3985:           {
                   3986:           if (eptr >= md->end_subject)
                   3987:             {
                   3988:             SCHECK_PARTIAL();
                   3989:             MRRETURN(MATCH_NOMATCH);
                   3990:             }
                   3991:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
                   3992:             MRRETURN(MATCH_NOMATCH);
                   3993:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3994:           }
                   3995:         break;
                   3996: 
                   3997:         default:
                   3998:         RRETURN(PCRE_ERROR_INTERNAL);
                   3999:         }  /* End switch(ctype) */
                   4000: 
                   4001:       else
                   4002: #endif     /* SUPPORT_UTF8 */
                   4003: 
                   4004:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   4005:       than OP_PROP and OP_NOTPROP. */
                   4006: 
                   4007:       switch(ctype)
                   4008:         {
                   4009:         case OP_ANY:
                   4010:         for (i = 1; i <= min; i++)
                   4011:           {
                   4012:           if (eptr >= md->end_subject)
                   4013:             {
                   4014:             SCHECK_PARTIAL();
                   4015:             MRRETURN(MATCH_NOMATCH);
                   4016:             }
                   4017:           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
                   4018:           eptr++;
                   4019:           }
                   4020:         break;
                   4021: 
                   4022:         case OP_ALLANY:
                   4023:         if (eptr > md->end_subject - min)
                   4024:           {
                   4025:           SCHECK_PARTIAL();
                   4026:           MRRETURN(MATCH_NOMATCH);
                   4027:           }
                   4028:         eptr += min;
                   4029:         break;
                   4030: 
                   4031:         case OP_ANYBYTE:
                   4032:         if (eptr > md->end_subject - min)
                   4033:           {
                   4034:           SCHECK_PARTIAL();
                   4035:           MRRETURN(MATCH_NOMATCH);
                   4036:           }
                   4037:         eptr += min;
                   4038:         break;
                   4039: 
                   4040:         case OP_ANYNL:
                   4041:         for (i = 1; i <= min; i++)
                   4042:           {
                   4043:           if (eptr >= md->end_subject)
                   4044:             {
                   4045:             SCHECK_PARTIAL();
                   4046:             MRRETURN(MATCH_NOMATCH);
                   4047:             }
                   4048:           switch(*eptr++)
                   4049:             {
                   4050:             default: MRRETURN(MATCH_NOMATCH);
                   4051:             case 0x000d:
                   4052:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4053:             break;
                   4054:             case 0x000a:
                   4055:             break;
                   4056: 
                   4057:             case 0x000b:
                   4058:             case 0x000c:
                   4059:             case 0x0085:
                   4060:             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
                   4061:             break;
                   4062:             }
                   4063:           }
                   4064:         break;
                   4065: 
                   4066:         case OP_NOT_HSPACE:
                   4067:         for (i = 1; i <= min; i++)
                   4068:           {
                   4069:           if (eptr >= md->end_subject)
                   4070:             {
                   4071:             SCHECK_PARTIAL();
                   4072:             MRRETURN(MATCH_NOMATCH);
                   4073:             }
                   4074:           switch(*eptr++)
                   4075:             {
                   4076:             default: break;
                   4077:             case 0x09:      /* HT */
                   4078:             case 0x20:      /* SPACE */
                   4079:             case 0xa0:      /* NBSP */
                   4080:             MRRETURN(MATCH_NOMATCH);
                   4081:             }
                   4082:           }
                   4083:         break;
                   4084: 
                   4085:         case OP_HSPACE:
                   4086:         for (i = 1; i <= min; i++)
                   4087:           {
                   4088:           if (eptr >= md->end_subject)
                   4089:             {
                   4090:             SCHECK_PARTIAL();
                   4091:             MRRETURN(MATCH_NOMATCH);
                   4092:             }
                   4093:           switch(*eptr++)
                   4094:             {
                   4095:             default: MRRETURN(MATCH_NOMATCH);
                   4096:             case 0x09:      /* HT */
                   4097:             case 0x20:      /* SPACE */
                   4098:             case 0xa0:      /* NBSP */
                   4099:             break;
                   4100:             }
                   4101:           }
                   4102:         break;
                   4103: 
                   4104:         case OP_NOT_VSPACE:
                   4105:         for (i = 1; i <= min; i++)
                   4106:           {
                   4107:           if (eptr >= md->end_subject)
                   4108:             {
                   4109:             SCHECK_PARTIAL();
                   4110:             MRRETURN(MATCH_NOMATCH);
                   4111:             }
                   4112:           switch(*eptr++)
                   4113:             {
                   4114:             default: break;
                   4115:             case 0x0a:      /* LF */
                   4116:             case 0x0b:      /* VT */
                   4117:             case 0x0c:      /* FF */
                   4118:             case 0x0d:      /* CR */
                   4119:             case 0x85:      /* NEL */
                   4120:             MRRETURN(MATCH_NOMATCH);
                   4121:             }
                   4122:           }
                   4123:         break;
                   4124: 
                   4125:         case OP_VSPACE:
                   4126:         for (i = 1; i <= min; i++)
                   4127:           {
                   4128:           if (eptr >= md->end_subject)
                   4129:             {
                   4130:             SCHECK_PARTIAL();
                   4131:             MRRETURN(MATCH_NOMATCH);
                   4132:             }
                   4133:           switch(*eptr++)
                   4134:             {
                   4135:             default: MRRETURN(MATCH_NOMATCH);
                   4136:             case 0x0a:      /* LF */
                   4137:             case 0x0b:      /* VT */
                   4138:             case 0x0c:      /* FF */
                   4139:             case 0x0d:      /* CR */
                   4140:             case 0x85:      /* NEL */
                   4141:             break;
                   4142:             }
                   4143:           }
                   4144:         break;
                   4145: 
                   4146:         case OP_NOT_DIGIT:
                   4147:         for (i = 1; i <= min; i++)
                   4148:           {
                   4149:           if (eptr >= md->end_subject)
                   4150:             {
                   4151:             SCHECK_PARTIAL();
                   4152:             MRRETURN(MATCH_NOMATCH);
                   4153:             }
                   4154:           if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
                   4155:           }
                   4156:         break;
                   4157: 
                   4158:         case OP_DIGIT:
                   4159:         for (i = 1; i <= min; i++)
                   4160:           {
                   4161:           if (eptr >= md->end_subject)
                   4162:             {
                   4163:             SCHECK_PARTIAL();
                   4164:             MRRETURN(MATCH_NOMATCH);
                   4165:             }
                   4166:           if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
                   4167:           }
                   4168:         break;
                   4169: 
                   4170:         case OP_NOT_WHITESPACE:
                   4171:         for (i = 1; i <= min; i++)
                   4172:           {
                   4173:           if (eptr >= md->end_subject)
                   4174:             {
                   4175:             SCHECK_PARTIAL();
                   4176:             MRRETURN(MATCH_NOMATCH);
                   4177:             }
                   4178:           if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
                   4179:           }
                   4180:         break;
                   4181: 
                   4182:         case OP_WHITESPACE:
                   4183:         for (i = 1; i <= min; i++)
                   4184:           {
                   4185:           if (eptr >= md->end_subject)
                   4186:             {
                   4187:             SCHECK_PARTIAL();
                   4188:             MRRETURN(MATCH_NOMATCH);
                   4189:             }
                   4190:           if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
                   4191:           }
                   4192:         break;
                   4193: 
                   4194:         case OP_NOT_WORDCHAR:
                   4195:         for (i = 1; i <= min; i++)
                   4196:           {
                   4197:           if (eptr >= md->end_subject)
                   4198:             {
                   4199:             SCHECK_PARTIAL();
                   4200:             MRRETURN(MATCH_NOMATCH);
                   4201:             }
                   4202:           if ((md->ctypes[*eptr++] & ctype_word) != 0)
                   4203:             MRRETURN(MATCH_NOMATCH);
                   4204:           }
                   4205:         break;
                   4206: 
                   4207:         case OP_WORDCHAR:
                   4208:         for (i = 1; i <= min; i++)
                   4209:           {
                   4210:           if (eptr >= md->end_subject)
                   4211:             {
                   4212:             SCHECK_PARTIAL();
                   4213:             MRRETURN(MATCH_NOMATCH);
                   4214:             }
                   4215:           if ((md->ctypes[*eptr++] & ctype_word) == 0)
                   4216:             MRRETURN(MATCH_NOMATCH);
                   4217:           }
                   4218:         break;
                   4219: 
                   4220:         default:
                   4221:         RRETURN(PCRE_ERROR_INTERNAL);
                   4222:         }
                   4223:       }
                   4224: 
                   4225:     /* If min = max, continue at the same level without recursing */
                   4226: 
                   4227:     if (min == max) continue;
                   4228: 
                   4229:     /* If minimizing, we have to test the rest of the pattern before each
                   4230:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4231:     separate the UCP cases. */
                   4232: 
                   4233:     if (minimize)
                   4234:       {
                   4235: #ifdef SUPPORT_UCP
                   4236:       if (prop_type >= 0)
                   4237:         {
                   4238:         switch(prop_type)
                   4239:           {
                   4240:           case PT_ANY:
                   4241:           for (fi = min;; fi++)
                   4242:             {
                   4243:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
                   4244:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4245:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4246:             if (eptr >= md->end_subject)
                   4247:               {
                   4248:               SCHECK_PARTIAL();
                   4249:               MRRETURN(MATCH_NOMATCH);
                   4250:               }
                   4251:             GETCHARINCTEST(c, eptr);
                   4252:             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
                   4253:             }
                   4254:           /* Control never gets here */
                   4255: 
                   4256:           case PT_LAMP:
                   4257:           for (fi = min;; fi++)
                   4258:             {
                   4259:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
                   4260:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4261:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4262:             if (eptr >= md->end_subject)
                   4263:               {
                   4264:               SCHECK_PARTIAL();
                   4265:               MRRETURN(MATCH_NOMATCH);
                   4266:               }
                   4267:             GETCHARINCTEST(c, eptr);
                   4268:             prop_chartype = UCD_CHARTYPE(c);
                   4269:             if ((prop_chartype == ucp_Lu ||
                   4270:                  prop_chartype == ucp_Ll ||
                   4271:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   4272:               MRRETURN(MATCH_NOMATCH);
                   4273:             }
                   4274:           /* Control never gets here */
                   4275: 
                   4276:           case PT_GC:
                   4277:           for (fi = min;; fi++)
                   4278:             {
                   4279:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
                   4280:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4281:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4282:             if (eptr >= md->end_subject)
                   4283:               {
                   4284:               SCHECK_PARTIAL();
                   4285:               MRRETURN(MATCH_NOMATCH);
                   4286:               }
                   4287:             GETCHARINCTEST(c, eptr);
                   4288:             prop_category = UCD_CATEGORY(c);
                   4289:             if ((prop_category == prop_value) == prop_fail_result)
                   4290:               MRRETURN(MATCH_NOMATCH);
                   4291:             }
                   4292:           /* Control never gets here */
                   4293: 
                   4294:           case PT_PC:
                   4295:           for (fi = min;; fi++)
                   4296:             {
                   4297:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
                   4298:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4299:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4300:             if (eptr >= md->end_subject)
                   4301:               {
                   4302:               SCHECK_PARTIAL();
                   4303:               MRRETURN(MATCH_NOMATCH);
                   4304:               }
                   4305:             GETCHARINCTEST(c, eptr);
                   4306:             prop_chartype = UCD_CHARTYPE(c);
                   4307:             if ((prop_chartype == prop_value) == prop_fail_result)
                   4308:               MRRETURN(MATCH_NOMATCH);
                   4309:             }
                   4310:           /* Control never gets here */
                   4311: 
                   4312:           case PT_SC:
                   4313:           for (fi = min;; fi++)
                   4314:             {
                   4315:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
                   4316:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4317:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4318:             if (eptr >= md->end_subject)
                   4319:               {
                   4320:               SCHECK_PARTIAL();
                   4321:               MRRETURN(MATCH_NOMATCH);
                   4322:               }
                   4323:             GETCHARINCTEST(c, eptr);
                   4324:             prop_script = UCD_SCRIPT(c);
                   4325:             if ((prop_script == prop_value) == prop_fail_result)
                   4326:               MRRETURN(MATCH_NOMATCH);
                   4327:             }
                   4328:           /* Control never gets here */
                   4329: 
                   4330:           case PT_ALNUM:
                   4331:           for (fi = min;; fi++)
                   4332:             {
                   4333:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
                   4334:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4335:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4336:             if (eptr >= md->end_subject)
                   4337:               {
                   4338:               SCHECK_PARTIAL();
                   4339:               MRRETURN(MATCH_NOMATCH);
                   4340:               }
                   4341:             GETCHARINCTEST(c, eptr);
                   4342:             prop_category = UCD_CATEGORY(c);
                   4343:             if ((prop_category == ucp_L || prop_category == ucp_N)
                   4344:                    == prop_fail_result)
                   4345:               MRRETURN(MATCH_NOMATCH);
                   4346:             }
                   4347:           /* Control never gets here */
                   4348: 
                   4349:           case PT_SPACE:    /* Perl space */
                   4350:           for (fi = min;; fi++)
                   4351:             {
                   4352:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
                   4353:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4354:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4355:             if (eptr >= md->end_subject)
                   4356:               {
                   4357:               SCHECK_PARTIAL();
                   4358:               MRRETURN(MATCH_NOMATCH);
                   4359:               }
                   4360:             GETCHARINCTEST(c, eptr);
                   4361:             prop_category = UCD_CATEGORY(c);
                   4362:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4363:                  c == CHAR_FF || c == CHAR_CR)
                   4364:                    == prop_fail_result)
                   4365:               MRRETURN(MATCH_NOMATCH);
                   4366:             }
                   4367:           /* Control never gets here */
                   4368: 
                   4369:           case PT_PXSPACE:  /* POSIX space */
                   4370:           for (fi = min;; fi++)
                   4371:             {
                   4372:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
                   4373:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4374:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4375:             if (eptr >= md->end_subject)
                   4376:               {
                   4377:               SCHECK_PARTIAL();
                   4378:               MRRETURN(MATCH_NOMATCH);
                   4379:               }
                   4380:             GETCHARINCTEST(c, eptr);
                   4381:             prop_category = UCD_CATEGORY(c);
                   4382:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4383:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4384:                    == prop_fail_result)
                   4385:               MRRETURN(MATCH_NOMATCH);
                   4386:             }
                   4387:           /* Control never gets here */
                   4388: 
                   4389:           case PT_WORD:
                   4390:           for (fi = min;; fi++)
                   4391:             {
                   4392:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
                   4393:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4394:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4395:             if (eptr >= md->end_subject)
                   4396:               {
                   4397:               SCHECK_PARTIAL();
                   4398:               MRRETURN(MATCH_NOMATCH);
                   4399:               }
                   4400:             GETCHARINCTEST(c, eptr);
                   4401:             prop_category = UCD_CATEGORY(c);
                   4402:             if ((prop_category == ucp_L ||
                   4403:                  prop_category == ucp_N ||
                   4404:                  c == CHAR_UNDERSCORE)
                   4405:                    == prop_fail_result)
                   4406:               MRRETURN(MATCH_NOMATCH);
                   4407:             }
                   4408:           /* Control never gets here */
                   4409: 
                   4410:           /* This should never occur */
                   4411: 
                   4412:           default:
                   4413:           RRETURN(PCRE_ERROR_INTERNAL);
                   4414:           }
                   4415:         }
                   4416: 
                   4417:       /* Match extended Unicode sequences. We will get here only if the
                   4418:       support is in the binary; otherwise a compile-time error occurs. */
                   4419: 
                   4420:       else if (ctype == OP_EXTUNI)
                   4421:         {
                   4422:         for (fi = min;; fi++)
                   4423:           {
                   4424:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
                   4425:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4426:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4427:           if (eptr >= md->end_subject)
                   4428:             {
                   4429:             SCHECK_PARTIAL();
                   4430:             MRRETURN(MATCH_NOMATCH);
                   4431:             }
                   4432:           GETCHARINCTEST(c, eptr);
                   4433:           prop_category = UCD_CATEGORY(c);
                   4434:           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
                   4435:           while (eptr < md->end_subject)
                   4436:             {
                   4437:             int len = 1;
                   4438:             if (!utf8) c = *eptr;
                   4439:               else { GETCHARLEN(c, eptr, len); }
                   4440:             prop_category = UCD_CATEGORY(c);
                   4441:             if (prop_category != ucp_M) break;
                   4442:             eptr += len;
                   4443:             }
                   4444:           }
                   4445:         }
                   4446: 
                   4447:       else
                   4448: #endif     /* SUPPORT_UCP */
                   4449: 
                   4450: #ifdef SUPPORT_UTF8
                   4451:       /* UTF-8 mode */
                   4452:       if (utf8)
                   4453:         {
                   4454:         for (fi = min;; fi++)
                   4455:           {
                   4456:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
                   4457:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4458:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4459:           if (eptr >= md->end_subject)
                   4460:             {
                   4461:             SCHECK_PARTIAL();
                   4462:             MRRETURN(MATCH_NOMATCH);
                   4463:             }
                   4464:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   4465:             MRRETURN(MATCH_NOMATCH);
                   4466:           GETCHARINC(c, eptr);
                   4467:           switch(ctype)
                   4468:             {
                   4469:             case OP_ANY:        /* This is the non-NL case */
                   4470:             case OP_ALLANY:
                   4471:             case OP_ANYBYTE:
                   4472:             break;
                   4473: 
                   4474:             case OP_ANYNL:
                   4475:             switch(c)
                   4476:               {
                   4477:               default: MRRETURN(MATCH_NOMATCH);
                   4478:               case 0x000d:
                   4479:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4480:               break;
                   4481:               case 0x000a:
                   4482:               break;
                   4483: 
                   4484:               case 0x000b:
                   4485:               case 0x000c:
                   4486:               case 0x0085:
                   4487:               case 0x2028:
                   4488:               case 0x2029:
                   4489:               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
                   4490:               break;
                   4491:               }
                   4492:             break;
                   4493: 
                   4494:             case OP_NOT_HSPACE:
                   4495:             switch(c)
                   4496:               {
                   4497:               default: break;
                   4498:               case 0x09:      /* HT */
                   4499:               case 0x20:      /* SPACE */
                   4500:               case 0xa0:      /* NBSP */
                   4501:               case 0x1680:    /* OGHAM SPACE MARK */
                   4502:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4503:               case 0x2000:    /* EN QUAD */
                   4504:               case 0x2001:    /* EM QUAD */
                   4505:               case 0x2002:    /* EN SPACE */
                   4506:               case 0x2003:    /* EM SPACE */
                   4507:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4508:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4509:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4510:               case 0x2007:    /* FIGURE SPACE */
                   4511:               case 0x2008:    /* PUNCTUATION SPACE */
                   4512:               case 0x2009:    /* THIN SPACE */
                   4513:               case 0x200A:    /* HAIR SPACE */
                   4514:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4515:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4516:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4517:               MRRETURN(MATCH_NOMATCH);
                   4518:               }
                   4519:             break;
                   4520: 
                   4521:             case OP_HSPACE:
                   4522:             switch(c)
                   4523:               {
                   4524:               default: MRRETURN(MATCH_NOMATCH);
                   4525:               case 0x09:      /* HT */
                   4526:               case 0x20:      /* SPACE */
                   4527:               case 0xa0:      /* NBSP */
                   4528:               case 0x1680:    /* OGHAM SPACE MARK */
                   4529:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4530:               case 0x2000:    /* EN QUAD */
                   4531:               case 0x2001:    /* EM QUAD */
                   4532:               case 0x2002:    /* EN SPACE */
                   4533:               case 0x2003:    /* EM SPACE */
                   4534:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4535:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4536:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4537:               case 0x2007:    /* FIGURE SPACE */
                   4538:               case 0x2008:    /* PUNCTUATION SPACE */
                   4539:               case 0x2009:    /* THIN SPACE */
                   4540:               case 0x200A:    /* HAIR SPACE */
                   4541:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4542:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4543:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4544:               break;
                   4545:               }
                   4546:             break;
                   4547: 
                   4548:             case OP_NOT_VSPACE:
                   4549:             switch(c)
                   4550:               {
                   4551:               default: break;
                   4552:               case 0x0a:      /* LF */
                   4553:               case 0x0b:      /* VT */
                   4554:               case 0x0c:      /* FF */
                   4555:               case 0x0d:      /* CR */
                   4556:               case 0x85:      /* NEL */
                   4557:               case 0x2028:    /* LINE SEPARATOR */
                   4558:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4559:               MRRETURN(MATCH_NOMATCH);
                   4560:               }
                   4561:             break;
                   4562: 
                   4563:             case OP_VSPACE:
                   4564:             switch(c)
                   4565:               {
                   4566:               default: MRRETURN(MATCH_NOMATCH);
                   4567:               case 0x0a:      /* LF */
                   4568:               case 0x0b:      /* VT */
                   4569:               case 0x0c:      /* FF */
                   4570:               case 0x0d:      /* CR */
                   4571:               case 0x85:      /* NEL */
                   4572:               case 0x2028:    /* LINE SEPARATOR */
                   4573:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4574:               break;
                   4575:               }
                   4576:             break;
                   4577: 
                   4578:             case OP_NOT_DIGIT:
                   4579:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
                   4580:               MRRETURN(MATCH_NOMATCH);
                   4581:             break;
                   4582: 
                   4583:             case OP_DIGIT:
                   4584:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
                   4585:               MRRETURN(MATCH_NOMATCH);
                   4586:             break;
                   4587: 
                   4588:             case OP_NOT_WHITESPACE:
                   4589:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
                   4590:               MRRETURN(MATCH_NOMATCH);
                   4591:             break;
                   4592: 
                   4593:             case OP_WHITESPACE:
                   4594:             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
                   4595:               MRRETURN(MATCH_NOMATCH);
                   4596:             break;
                   4597: 
                   4598:             case OP_NOT_WORDCHAR:
                   4599:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
                   4600:               MRRETURN(MATCH_NOMATCH);
                   4601:             break;
                   4602: 
                   4603:             case OP_WORDCHAR:
                   4604:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
                   4605:               MRRETURN(MATCH_NOMATCH);
                   4606:             break;
                   4607: 
                   4608:             default:
                   4609:             RRETURN(PCRE_ERROR_INTERNAL);
                   4610:             }
                   4611:           }
                   4612:         }
                   4613:       else
                   4614: #endif
                   4615:       /* Not UTF-8 mode */
                   4616:         {
                   4617:         for (fi = min;; fi++)
                   4618:           {
                   4619:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
                   4620:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4621:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4622:           if (eptr >= md->end_subject)
                   4623:             {
                   4624:             SCHECK_PARTIAL();
                   4625:             MRRETURN(MATCH_NOMATCH);
                   4626:             }
                   4627:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   4628:             MRRETURN(MATCH_NOMATCH);
                   4629:           c = *eptr++;
                   4630:           switch(ctype)
                   4631:             {
                   4632:             case OP_ANY:     /* This is the non-NL case */
                   4633:             case OP_ALLANY:
                   4634:             case OP_ANYBYTE:
                   4635:             break;
                   4636: 
                   4637:             case OP_ANYNL:
                   4638:             switch(c)
                   4639:               {
                   4640:               default: MRRETURN(MATCH_NOMATCH);
                   4641:               case 0x000d:
                   4642:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4643:               break;
                   4644: 
                   4645:               case 0x000a:
                   4646:               break;
                   4647: 
                   4648:               case 0x000b:
                   4649:               case 0x000c:
                   4650:               case 0x0085:
                   4651:               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
                   4652:               break;
                   4653:               }
                   4654:             break;
                   4655: 
                   4656:             case OP_NOT_HSPACE:
                   4657:             switch(c)
                   4658:               {
                   4659:               default: break;
                   4660:               case 0x09:      /* HT */
                   4661:               case 0x20:      /* SPACE */
                   4662:               case 0xa0:      /* NBSP */
                   4663:               MRRETURN(MATCH_NOMATCH);
                   4664:               }
                   4665:             break;
                   4666: 
                   4667:             case OP_HSPACE:
                   4668:             switch(c)
                   4669:               {
                   4670:               default: MRRETURN(MATCH_NOMATCH);
                   4671:               case 0x09:      /* HT */
                   4672:               case 0x20:      /* SPACE */
                   4673:               case 0xa0:      /* NBSP */
                   4674:               break;
                   4675:               }
                   4676:             break;
                   4677: 
                   4678:             case OP_NOT_VSPACE:
                   4679:             switch(c)
                   4680:               {
                   4681:               default: break;
                   4682:               case 0x0a:      /* LF */
                   4683:               case 0x0b:      /* VT */
                   4684:               case 0x0c:      /* FF */
                   4685:               case 0x0d:      /* CR */
                   4686:               case 0x85:      /* NEL */
                   4687:               MRRETURN(MATCH_NOMATCH);
                   4688:               }
                   4689:             break;
                   4690: 
                   4691:             case OP_VSPACE:
                   4692:             switch(c)
                   4693:               {
                   4694:               default: MRRETURN(MATCH_NOMATCH);
                   4695:               case 0x0a:      /* LF */
                   4696:               case 0x0b:      /* VT */
                   4697:               case 0x0c:      /* FF */
                   4698:               case 0x0d:      /* CR */
                   4699:               case 0x85:      /* NEL */
                   4700:               break;
                   4701:               }
                   4702:             break;
                   4703: 
                   4704:             case OP_NOT_DIGIT:
                   4705:             if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
                   4706:             break;
                   4707: 
                   4708:             case OP_DIGIT:
                   4709:             if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
                   4710:             break;
                   4711: 
                   4712:             case OP_NOT_WHITESPACE:
                   4713:             if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
                   4714:             break;
                   4715: 
                   4716:             case OP_WHITESPACE:
                   4717:             if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
                   4718:             break;
                   4719: 
                   4720:             case OP_NOT_WORDCHAR:
                   4721:             if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
                   4722:             break;
                   4723: 
                   4724:             case OP_WORDCHAR:
                   4725:             if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
                   4726:             break;
                   4727: 
                   4728:             default:
                   4729:             RRETURN(PCRE_ERROR_INTERNAL);
                   4730:             }
                   4731:           }
                   4732:         }
                   4733:       /* Control never gets here */
                   4734:       }
                   4735: 
                   4736:     /* If maximizing, it is worth using inline code for speed, doing the type
                   4737:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   4738:     UTF-8 and UCP stuff separate. */
                   4739: 
                   4740:     else
                   4741:       {
                   4742:       pp = eptr;  /* Remember where we started */
                   4743: 
                   4744: #ifdef SUPPORT_UCP
                   4745:       if (prop_type >= 0)
                   4746:         {
                   4747:         switch(prop_type)
                   4748:           {
                   4749:           case PT_ANY:
                   4750:           for (i = min; i < max; i++)
                   4751:             {
                   4752:             int len = 1;
                   4753:             if (eptr >= md->end_subject)
                   4754:               {
                   4755:               SCHECK_PARTIAL();
                   4756:               break;
                   4757:               }
                   4758:             GETCHARLENTEST(c, eptr, len);
                   4759:             if (prop_fail_result) break;
                   4760:             eptr+= len;
                   4761:             }
                   4762:           break;
                   4763: 
                   4764:           case PT_LAMP:
                   4765:           for (i = min; i < max; i++)
                   4766:             {
                   4767:             int len = 1;
                   4768:             if (eptr >= md->end_subject)
                   4769:               {
                   4770:               SCHECK_PARTIAL();
                   4771:               break;
                   4772:               }
                   4773:             GETCHARLENTEST(c, eptr, len);
                   4774:             prop_chartype = UCD_CHARTYPE(c);
                   4775:             if ((prop_chartype == ucp_Lu ||
                   4776:                  prop_chartype == ucp_Ll ||
                   4777:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   4778:               break;
                   4779:             eptr+= len;
                   4780:             }
                   4781:           break;
                   4782: 
                   4783:           case PT_GC:
                   4784:           for (i = min; i < max; i++)
                   4785:             {
                   4786:             int len = 1;
                   4787:             if (eptr >= md->end_subject)
                   4788:               {
                   4789:               SCHECK_PARTIAL();
                   4790:               break;
                   4791:               }
                   4792:             GETCHARLENTEST(c, eptr, len);
                   4793:             prop_category = UCD_CATEGORY(c);
                   4794:             if ((prop_category == prop_value) == prop_fail_result)
                   4795:               break;
                   4796:             eptr+= len;
                   4797:             }
                   4798:           break;
                   4799: 
                   4800:           case PT_PC:
                   4801:           for (i = min; i < max; i++)
                   4802:             {
                   4803:             int len = 1;
                   4804:             if (eptr >= md->end_subject)
                   4805:               {
                   4806:               SCHECK_PARTIAL();
                   4807:               break;
                   4808:               }
                   4809:             GETCHARLENTEST(c, eptr, len);
                   4810:             prop_chartype = UCD_CHARTYPE(c);
                   4811:             if ((prop_chartype == prop_value) == prop_fail_result)
                   4812:               break;
                   4813:             eptr+= len;
                   4814:             }
                   4815:           break;
                   4816: 
                   4817:           case PT_SC:
                   4818:           for (i = min; i < max; i++)
                   4819:             {
                   4820:             int len = 1;
                   4821:             if (eptr >= md->end_subject)
                   4822:               {
                   4823:               SCHECK_PARTIAL();
                   4824:               break;
                   4825:               }
                   4826:             GETCHARLENTEST(c, eptr, len);
                   4827:             prop_script = UCD_SCRIPT(c);
                   4828:             if ((prop_script == prop_value) == prop_fail_result)
                   4829:               break;
                   4830:             eptr+= len;
                   4831:             }
                   4832:           break;
                   4833: 
                   4834:           case PT_ALNUM:
                   4835:           for (i = min; i < max; i++)
                   4836:             {
                   4837:             int len = 1;
                   4838:             if (eptr >= md->end_subject)
                   4839:               {
                   4840:               SCHECK_PARTIAL();
                   4841:               break;
                   4842:               }
                   4843:             GETCHARLENTEST(c, eptr, len);
                   4844:             prop_category = UCD_CATEGORY(c);
                   4845:             if ((prop_category == ucp_L || prop_category == ucp_N)
                   4846:                  == prop_fail_result)
                   4847:               break;
                   4848:             eptr+= len;
                   4849:             }
                   4850:           break;
                   4851: 
                   4852:           case PT_SPACE:    /* Perl space */
                   4853:           for (i = min; i < max; i++)
                   4854:             {
                   4855:             int len = 1;
                   4856:             if (eptr >= md->end_subject)
                   4857:               {
                   4858:               SCHECK_PARTIAL();
                   4859:               break;
                   4860:               }
                   4861:             GETCHARLENTEST(c, eptr, len);
                   4862:             prop_category = UCD_CATEGORY(c);
                   4863:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4864:                  c == CHAR_FF || c == CHAR_CR)
                   4865:                  == prop_fail_result)
                   4866:               break;
                   4867:             eptr+= len;
                   4868:             }
                   4869:           break;
                   4870: 
                   4871:           case PT_PXSPACE:  /* POSIX space */
                   4872:           for (i = min; i < max; i++)
                   4873:             {
                   4874:             int len = 1;
                   4875:             if (eptr >= md->end_subject)
                   4876:               {
                   4877:               SCHECK_PARTIAL();
                   4878:               break;
                   4879:               }
                   4880:             GETCHARLENTEST(c, eptr, len);
                   4881:             prop_category = UCD_CATEGORY(c);
                   4882:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4883:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4884:                  == prop_fail_result)
                   4885:               break;
                   4886:             eptr+= len;
                   4887:             }
                   4888:           break;
                   4889: 
                   4890:           case PT_WORD:
                   4891:           for (i = min; i < max; i++)
                   4892:             {
                   4893:             int len = 1;
                   4894:             if (eptr >= md->end_subject)
                   4895:               {
                   4896:               SCHECK_PARTIAL();
                   4897:               break;
                   4898:               }
                   4899:             GETCHARLENTEST(c, eptr, len);
                   4900:             prop_category = UCD_CATEGORY(c);
                   4901:             if ((prop_category == ucp_L || prop_category == ucp_N ||
                   4902:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   4903:               break;
                   4904:             eptr+= len;
                   4905:             }
                   4906:           break;
                   4907: 
                   4908:           default:
                   4909:           RRETURN(PCRE_ERROR_INTERNAL);
                   4910:           }
                   4911: 
                   4912:         /* eptr is now past the end of the maximum run */
                   4913: 
                   4914:         if (possessive) continue;
                   4915:         for(;;)
                   4916:           {
                   4917:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
                   4918:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4919:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4920:           if (utf8) BACKCHAR(eptr);
                   4921:           }
                   4922:         }
                   4923: 
                   4924:       /* Match extended Unicode sequences. We will get here only if the
                   4925:       support is in the binary; otherwise a compile-time error occurs. */
                   4926: 
                   4927:       else if (ctype == OP_EXTUNI)
                   4928:         {
                   4929:         for (i = min; i < max; i++)
                   4930:           {
                   4931:           if (eptr >= md->end_subject)
                   4932:             {
                   4933:             SCHECK_PARTIAL();
                   4934:             break;
                   4935:             }
                   4936:           GETCHARINCTEST(c, eptr);
                   4937:           prop_category = UCD_CATEGORY(c);
                   4938:           if (prop_category == ucp_M) break;
                   4939:           while (eptr < md->end_subject)
                   4940:             {
                   4941:             int len = 1;
                   4942:             if (!utf8) c = *eptr; else
                   4943:               {
                   4944:               GETCHARLEN(c, eptr, len);
                   4945:               }
                   4946:             prop_category = UCD_CATEGORY(c);
                   4947:             if (prop_category != ucp_M) break;
                   4948:             eptr += len;
                   4949:             }
                   4950:           }
                   4951: 
                   4952:         /* eptr is now past the end of the maximum run */
                   4953: 
                   4954:         if (possessive) continue;
                   4955: 
                   4956:         for(;;)
                   4957:           {
                   4958:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
                   4959:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4960:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4961:           for (;;)                        /* Move back over one extended */
                   4962:             {
                   4963:             int len = 1;
                   4964:             if (!utf8) c = *eptr; else
                   4965:               {
                   4966:               BACKCHAR(eptr);
                   4967:               GETCHARLEN(c, eptr, len);
                   4968:               }
                   4969:             prop_category = UCD_CATEGORY(c);
                   4970:             if (prop_category != ucp_M) break;
                   4971:             eptr--;
                   4972:             }
                   4973:           }
                   4974:         }
                   4975: 
                   4976:       else
                   4977: #endif   /* SUPPORT_UCP */
                   4978: 
                   4979: #ifdef SUPPORT_UTF8
                   4980:       /* UTF-8 mode */
                   4981: 
                   4982:       if (utf8)
                   4983:         {
                   4984:         switch(ctype)
                   4985:           {
                   4986:           case OP_ANY:
                   4987:           if (max < INT_MAX)
                   4988:             {
                   4989:             for (i = min; i < max; i++)
                   4990:               {
                   4991:               if (eptr >= md->end_subject)
                   4992:                 {
                   4993:                 SCHECK_PARTIAL();
                   4994:                 break;
                   4995:                 }
                   4996:               if (IS_NEWLINE(eptr)) break;
                   4997:               eptr++;
                   4998:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   4999:               }
                   5000:             }
                   5001: 
                   5002:           /* Handle unlimited UTF-8 repeat */
                   5003: 
                   5004:           else
                   5005:             {
                   5006:             for (i = min; i < max; i++)
                   5007:               {
                   5008:               if (eptr >= md->end_subject)
                   5009:                 {
                   5010:                 SCHECK_PARTIAL();
                   5011:                 break;
                   5012:                 }
                   5013:               if (IS_NEWLINE(eptr)) break;
                   5014:               eptr++;
                   5015:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5016:               }
                   5017:             }
                   5018:           break;
                   5019: 
                   5020:           case OP_ALLANY:
                   5021:           if (max < INT_MAX)
                   5022:             {
                   5023:             for (i = min; i < max; i++)
                   5024:               {
                   5025:               if (eptr >= md->end_subject)
                   5026:                 {
                   5027:                 SCHECK_PARTIAL();
                   5028:                 break;
                   5029:                 }
                   5030:               eptr++;
                   5031:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5032:               }
                   5033:             }
                   5034:           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5035:           break;
                   5036: 
                   5037:           /* The byte case is the same as non-UTF8 */
                   5038: 
                   5039:           case OP_ANYBYTE:
                   5040:           c = max - min;
                   5041:           if (c > (unsigned int)(md->end_subject - eptr))
                   5042:             {
                   5043:             eptr = md->end_subject;
                   5044:             SCHECK_PARTIAL();
                   5045:             }
                   5046:           else eptr += c;
                   5047:           break;
                   5048: 
                   5049:           case OP_ANYNL:
                   5050:           for (i = min; i < max; i++)
                   5051:             {
                   5052:             int len = 1;
                   5053:             if (eptr >= md->end_subject)
                   5054:               {
                   5055:               SCHECK_PARTIAL();
                   5056:               break;
                   5057:               }
                   5058:             GETCHARLEN(c, eptr, len);
                   5059:             if (c == 0x000d)
                   5060:               {
                   5061:               if (++eptr >= md->end_subject) break;
                   5062:               if (*eptr == 0x000a) eptr++;
                   5063:               }
                   5064:             else
                   5065:               {
                   5066:               if (c != 0x000a &&
                   5067:                   (md->bsr_anycrlf ||
                   5068:                    (c != 0x000b && c != 0x000c &&
                   5069:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   5070:                 break;
                   5071:               eptr += len;
                   5072:               }
                   5073:             }
                   5074:           break;
                   5075: 
                   5076:           case OP_NOT_HSPACE:
                   5077:           case OP_HSPACE:
                   5078:           for (i = min; i < max; i++)
                   5079:             {
                   5080:             BOOL gotspace;
                   5081:             int len = 1;
                   5082:             if (eptr >= md->end_subject)
                   5083:               {
                   5084:               SCHECK_PARTIAL();
                   5085:               break;
                   5086:               }
                   5087:             GETCHARLEN(c, eptr, len);
                   5088:             switch(c)
                   5089:               {
                   5090:               default: gotspace = FALSE; break;
                   5091:               case 0x09:      /* HT */
                   5092:               case 0x20:      /* SPACE */
                   5093:               case 0xa0:      /* NBSP */
                   5094:               case 0x1680:    /* OGHAM SPACE MARK */
                   5095:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5096:               case 0x2000:    /* EN QUAD */
                   5097:               case 0x2001:    /* EM QUAD */
                   5098:               case 0x2002:    /* EN SPACE */
                   5099:               case 0x2003:    /* EM SPACE */
                   5100:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5101:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5102:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5103:               case 0x2007:    /* FIGURE SPACE */
                   5104:               case 0x2008:    /* PUNCTUATION SPACE */
                   5105:               case 0x2009:    /* THIN SPACE */
                   5106:               case 0x200A:    /* HAIR SPACE */
                   5107:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5108:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5109:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5110:               gotspace = TRUE;
                   5111:               break;
                   5112:               }
                   5113:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5114:             eptr += len;
                   5115:             }
                   5116:           break;
                   5117: 
                   5118:           case OP_NOT_VSPACE:
                   5119:           case OP_VSPACE:
                   5120:           for (i = min; i < max; i++)
                   5121:             {
                   5122:             BOOL gotspace;
                   5123:             int len = 1;
                   5124:             if (eptr >= md->end_subject)
                   5125:               {
                   5126:               SCHECK_PARTIAL();
                   5127:               break;
                   5128:               }
                   5129:             GETCHARLEN(c, eptr, len);
                   5130:             switch(c)
                   5131:               {
                   5132:               default: gotspace = FALSE; break;
                   5133:               case 0x0a:      /* LF */
                   5134:               case 0x0b:      /* VT */
                   5135:               case 0x0c:      /* FF */
                   5136:               case 0x0d:      /* CR */
                   5137:               case 0x85:      /* NEL */
                   5138:               case 0x2028:    /* LINE SEPARATOR */
                   5139:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5140:               gotspace = TRUE;
                   5141:               break;
                   5142:               }
                   5143:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5144:             eptr += len;
                   5145:             }
                   5146:           break;
                   5147: 
                   5148:           case OP_NOT_DIGIT:
                   5149:           for (i = min; i < max; i++)
                   5150:             {
                   5151:             int len = 1;
                   5152:             if (eptr >= md->end_subject)
                   5153:               {
                   5154:               SCHECK_PARTIAL();
                   5155:               break;
                   5156:               }
                   5157:             GETCHARLEN(c, eptr, len);
                   5158:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5159:             eptr+= len;
                   5160:             }
                   5161:           break;
                   5162: 
                   5163:           case OP_DIGIT:
                   5164:           for (i = min; i < max; i++)
                   5165:             {
                   5166:             int len = 1;
                   5167:             if (eptr >= md->end_subject)
                   5168:               {
                   5169:               SCHECK_PARTIAL();
                   5170:               break;
                   5171:               }
                   5172:             GETCHARLEN(c, eptr, len);
                   5173:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5174:             eptr+= len;
                   5175:             }
                   5176:           break;
                   5177: 
                   5178:           case OP_NOT_WHITESPACE:
                   5179:           for (i = min; i < max; i++)
                   5180:             {
                   5181:             int len = 1;
                   5182:             if (eptr >= md->end_subject)
                   5183:               {
                   5184:               SCHECK_PARTIAL();
                   5185:               break;
                   5186:               }
                   5187:             GETCHARLEN(c, eptr, len);
                   5188:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5189:             eptr+= len;
                   5190:             }
                   5191:           break;
                   5192: 
                   5193:           case OP_WHITESPACE:
                   5194:           for (i = min; i < max; i++)
                   5195:             {
                   5196:             int len = 1;
                   5197:             if (eptr >= md->end_subject)
                   5198:               {
                   5199:               SCHECK_PARTIAL();
                   5200:               break;
                   5201:               }
                   5202:             GETCHARLEN(c, eptr, len);
                   5203:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5204:             eptr+= len;
                   5205:             }
                   5206:           break;
                   5207: 
                   5208:           case OP_NOT_WORDCHAR:
                   5209:           for (i = min; i < max; i++)
                   5210:             {
                   5211:             int len = 1;
                   5212:             if (eptr >= md->end_subject)
                   5213:               {
                   5214:               SCHECK_PARTIAL();
                   5215:               break;
                   5216:               }
                   5217:             GETCHARLEN(c, eptr, len);
                   5218:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5219:             eptr+= len;
                   5220:             }
                   5221:           break;
                   5222: 
                   5223:           case OP_WORDCHAR:
                   5224:           for (i = min; i < max; i++)
                   5225:             {
                   5226:             int len = 1;
                   5227:             if (eptr >= md->end_subject)
                   5228:               {
                   5229:               SCHECK_PARTIAL();
                   5230:               break;
                   5231:               }
                   5232:             GETCHARLEN(c, eptr, len);
                   5233:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5234:             eptr+= len;
                   5235:             }
                   5236:           break;
                   5237: 
                   5238:           default:
                   5239:           RRETURN(PCRE_ERROR_INTERNAL);
                   5240:           }
                   5241: 
                   5242:         /* eptr is now past the end of the maximum run */
                   5243: 
                   5244:         if (possessive) continue;
                   5245:         for(;;)
                   5246:           {
                   5247:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
                   5248:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5249:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5250:           BACKCHAR(eptr);
                   5251:           }
                   5252:         }
                   5253:       else
                   5254: #endif  /* SUPPORT_UTF8 */
                   5255: 
                   5256:       /* Not UTF-8 mode */
                   5257:         {
                   5258:         switch(ctype)
                   5259:           {
                   5260:           case OP_ANY:
                   5261:           for (i = min; i < max; i++)
                   5262:             {
                   5263:             if (eptr >= md->end_subject)
                   5264:               {
                   5265:               SCHECK_PARTIAL();
                   5266:               break;
                   5267:               }
                   5268:             if (IS_NEWLINE(eptr)) break;
                   5269:             eptr++;
                   5270:             }
                   5271:           break;
                   5272: 
                   5273:           case OP_ALLANY:
                   5274:           case OP_ANYBYTE:
                   5275:           c = max - min;
                   5276:           if (c > (unsigned int)(md->end_subject - eptr))
                   5277:             {
                   5278:             eptr = md->end_subject;
                   5279:             SCHECK_PARTIAL();
                   5280:             }
                   5281:           else eptr += c;
                   5282:           break;
                   5283: 
                   5284:           case OP_ANYNL:
                   5285:           for (i = min; i < max; i++)
                   5286:             {
                   5287:             if (eptr >= md->end_subject)
                   5288:               {
                   5289:               SCHECK_PARTIAL();
                   5290:               break;
                   5291:               }
                   5292:             c = *eptr;
                   5293:             if (c == 0x000d)
                   5294:               {
                   5295:               if (++eptr >= md->end_subject) break;
                   5296:               if (*eptr == 0x000a) eptr++;
                   5297:               }
                   5298:             else
                   5299:               {
                   5300:               if (c != 0x000a &&
                   5301:                   (md->bsr_anycrlf ||
                   5302:                     (c != 0x000b && c != 0x000c && c != 0x0085)))
                   5303:                 break;
                   5304:               eptr++;
                   5305:               }
                   5306:             }
                   5307:           break;
                   5308: 
                   5309:           case OP_NOT_HSPACE:
                   5310:           for (i = min; i < max; i++)
                   5311:             {
                   5312:             if (eptr >= md->end_subject)
                   5313:               {
                   5314:               SCHECK_PARTIAL();
                   5315:               break;
                   5316:               }
                   5317:             c = *eptr;
                   5318:             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
                   5319:             eptr++;
                   5320:             }
                   5321:           break;
                   5322: 
                   5323:           case OP_HSPACE:
                   5324:           for (i = min; i < max; i++)
                   5325:             {
                   5326:             if (eptr >= md->end_subject)
                   5327:               {
                   5328:               SCHECK_PARTIAL();
                   5329:               break;
                   5330:               }
                   5331:             c = *eptr;
                   5332:             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
                   5333:             eptr++;
                   5334:             }
                   5335:           break;
                   5336: 
                   5337:           case OP_NOT_VSPACE:
                   5338:           for (i = min; i < max; i++)
                   5339:             {
                   5340:             if (eptr >= md->end_subject)
                   5341:               {
                   5342:               SCHECK_PARTIAL();
                   5343:               break;
                   5344:               }
                   5345:             c = *eptr;
                   5346:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
                   5347:               break;
                   5348:             eptr++;
                   5349:             }
                   5350:           break;
                   5351: 
                   5352:           case OP_VSPACE:
                   5353:           for (i = min; i < max; i++)
                   5354:             {
                   5355:             if (eptr >= md->end_subject)
                   5356:               {
                   5357:               SCHECK_PARTIAL();
                   5358:               break;
                   5359:               }
                   5360:             c = *eptr;
                   5361:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
                   5362:               break;
                   5363:             eptr++;
                   5364:             }
                   5365:           break;
                   5366: 
                   5367:           case OP_NOT_DIGIT:
                   5368:           for (i = min; i < max; i++)
                   5369:             {
                   5370:             if (eptr >= md->end_subject)
                   5371:               {
                   5372:               SCHECK_PARTIAL();
                   5373:               break;
                   5374:               }
                   5375:             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
                   5376:             eptr++;
                   5377:             }
                   5378:           break;
                   5379: 
                   5380:           case OP_DIGIT:
                   5381:           for (i = min; i < max; i++)
                   5382:             {
                   5383:             if (eptr >= md->end_subject)
                   5384:               {
                   5385:               SCHECK_PARTIAL();
                   5386:               break;
                   5387:               }
                   5388:             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
                   5389:             eptr++;
                   5390:             }
                   5391:           break;
                   5392: 
                   5393:           case OP_NOT_WHITESPACE:
                   5394:           for (i = min; i < max; i++)
                   5395:             {
                   5396:             if (eptr >= md->end_subject)
                   5397:               {
                   5398:               SCHECK_PARTIAL();
                   5399:               break;
                   5400:               }
                   5401:             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
                   5402:             eptr++;
                   5403:             }
                   5404:           break;
                   5405: 
                   5406:           case OP_WHITESPACE:
                   5407:           for (i = min; i < max; i++)
                   5408:             {
                   5409:             if (eptr >= md->end_subject)
                   5410:               {
                   5411:               SCHECK_PARTIAL();
                   5412:               break;
                   5413:               }
                   5414:             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
                   5415:             eptr++;
                   5416:             }
                   5417:           break;
                   5418: 
                   5419:           case OP_NOT_WORDCHAR:
                   5420:           for (i = min; i < max; i++)
                   5421:             {
                   5422:             if (eptr >= md->end_subject)
                   5423:               {
                   5424:               SCHECK_PARTIAL();
                   5425:               break;
                   5426:               }
                   5427:             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
                   5428:             eptr++;
                   5429:             }
                   5430:           break;
                   5431: 
                   5432:           case OP_WORDCHAR:
                   5433:           for (i = min; i < max; i++)
                   5434:             {
                   5435:             if (eptr >= md->end_subject)
                   5436:               {
                   5437:               SCHECK_PARTIAL();
                   5438:               break;
                   5439:               }
                   5440:             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
                   5441:             eptr++;
                   5442:             }
                   5443:           break;
                   5444: 
                   5445:           default:
                   5446:           RRETURN(PCRE_ERROR_INTERNAL);
                   5447:           }
                   5448: 
                   5449:         /* eptr is now past the end of the maximum run */
                   5450: 
                   5451:         if (possessive) continue;
                   5452:         while (eptr >= pp)
                   5453:           {
                   5454:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
                   5455:           eptr--;
                   5456:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5457:           }
                   5458:         }
                   5459: 
                   5460:       /* Get here if we can't make it match with any permitted repetitions */
                   5461: 
                   5462:       MRRETURN(MATCH_NOMATCH);
                   5463:       }
                   5464:     /* Control never gets here */
                   5465: 
                   5466:     /* There's been some horrible disaster. Arrival here can only mean there is
                   5467:     something seriously wrong in the code above or the OP_xxx definitions. */
                   5468: 
                   5469:     default:
                   5470:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   5471:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   5472:     }
                   5473: 
                   5474:   /* Do not stick any code in here without much thought; it is assumed
                   5475:   that "continue" in the code above comes out to here to repeat the main
                   5476:   loop. */
                   5477: 
                   5478:   }             /* End of main loop */
                   5479: /* Control never reaches here */
                   5480: 
                   5481: 
                   5482: /* When compiling to use the heap rather than the stack for recursive calls to
                   5483: match(), the RRETURN() macro jumps here. The number that is saved in
                   5484: frame->Xwhere indicates which label we actually want to return to. */
                   5485: 
                   5486: #ifdef NO_RECURSE
                   5487: #define LBL(val) case val: goto L_RM##val;
                   5488: HEAP_RETURN:
                   5489: switch (frame->Xwhere)
                   5490:   {
                   5491:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   5492:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   5493:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   5494:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
                   5495:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
                   5496: #ifdef SUPPORT_UTF8
                   5497:   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
                   5498:   LBL(32) LBL(34) LBL(42) LBL(46)
                   5499: #ifdef SUPPORT_UCP
                   5500:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
                   5501:   LBL(59) LBL(60) LBL(61) LBL(62)
                   5502: #endif  /* SUPPORT_UCP */
                   5503: #endif  /* SUPPORT_UTF8 */
                   5504:   default:
                   5505:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   5506:   return PCRE_ERROR_INTERNAL;
                   5507:   }
                   5508: #undef LBL
                   5509: #endif  /* NO_RECURSE */
                   5510: }
                   5511: 
                   5512: 
                   5513: /***************************************************************************
                   5514: ****************************************************************************
                   5515:                    RECURSION IN THE match() FUNCTION
                   5516: 
                   5517: Undefine all the macros that were defined above to handle this. */
                   5518: 
                   5519: #ifdef NO_RECURSE
                   5520: #undef eptr
                   5521: #undef ecode
                   5522: #undef mstart
                   5523: #undef offset_top
                   5524: #undef ims
                   5525: #undef eptrb
                   5526: #undef flags
                   5527: 
                   5528: #undef callpat
                   5529: #undef charptr
                   5530: #undef data
                   5531: #undef next
                   5532: #undef pp
                   5533: #undef prev
                   5534: #undef saved_eptr
                   5535: 
                   5536: #undef new_recursive
                   5537: 
                   5538: #undef cur_is_word
                   5539: #undef condition
                   5540: #undef prev_is_word
                   5541: 
                   5542: #undef original_ims
                   5543: 
                   5544: #undef ctype
                   5545: #undef length
                   5546: #undef max
                   5547: #undef min
                   5548: #undef number
                   5549: #undef offset
                   5550: #undef op
                   5551: #undef save_capture_last
                   5552: #undef save_offset1
                   5553: #undef save_offset2
                   5554: #undef save_offset3
                   5555: #undef stacksave
                   5556: 
                   5557: #undef newptrb
                   5558: 
                   5559: #endif
                   5560: 
                   5561: /* These two are defined as macros in both cases */
                   5562: 
                   5563: #undef fc
                   5564: #undef fi
                   5565: 
                   5566: /***************************************************************************
                   5567: ***************************************************************************/
                   5568: 
                   5569: 
                   5570: 
                   5571: /*************************************************
                   5572: *         Execute a Regular Expression           *
                   5573: *************************************************/
                   5574: 
                   5575: /* This function applies a compiled re to a subject string and picks out
                   5576: portions of the string if it matches. Two elements in the vector are set for
                   5577: each substring: the offsets to the start and end of the substring.
                   5578: 
                   5579: Arguments:
                   5580:   argument_re     points to the compiled expression
                   5581:   extra_data      points to extra data or is NULL
                   5582:   subject         points to the subject string
                   5583:   length          length of subject string (may contain binary zeros)
                   5584:   start_offset    where to start in the subject string
                   5585:   options         option bits
                   5586:   offsets         points to a vector of ints to be filled in with offsets
                   5587:   offsetcount     the number of elements in the vector
                   5588: 
                   5589: Returns:          > 0 => success; value is the number of elements filled in
                   5590:                   = 0 => success, but offsets is not big enough
                   5591:                    -1 => failed to match
                   5592:                  < -1 => some kind of unexpected problem
                   5593: */
                   5594: 
                   5595: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   5596: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   5597:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   5598:   int offsetcount)
                   5599: {
                   5600: int rc, resetcount, ocount;
                   5601: int first_byte = -1;
                   5602: int req_byte = -1;
                   5603: int req_byte2 = -1;
                   5604: int newline;
                   5605: unsigned long int ims;
                   5606: BOOL using_temporary_offsets = FALSE;
                   5607: BOOL anchored;
                   5608: BOOL startline;
                   5609: BOOL firstline;
                   5610: BOOL first_byte_caseless = FALSE;
                   5611: BOOL req_byte_caseless = FALSE;
                   5612: BOOL utf8;
                   5613: match_data match_block;
                   5614: match_data *md = &match_block;
                   5615: const uschar *tables;
                   5616: const uschar *start_bits = NULL;
                   5617: USPTR start_match = (USPTR)subject + start_offset;
                   5618: USPTR end_subject;
                   5619: USPTR start_partial = NULL;
                   5620: USPTR req_byte_ptr = start_match - 1;
                   5621: 
                   5622: pcre_study_data internal_study;
                   5623: const pcre_study_data *study;
                   5624: 
                   5625: real_pcre internal_re;
                   5626: const real_pcre *external_re = (const real_pcre *)argument_re;
                   5627: const real_pcre *re = external_re;
                   5628: 
                   5629: /* Plausibility checks */
                   5630: 
                   5631: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   5632: if (re == NULL || subject == NULL ||
                   5633:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   5634: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   5635: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   5636: 
                   5637: /* This information is for finding all the numbers associated with a given
                   5638: name, for condition testing. */
                   5639: 
                   5640: md->name_table = (uschar *)re + re->name_table_offset;
                   5641: md->name_count = re->name_count;
                   5642: md->name_entry_size = re->name_entry_size;
                   5643: 
                   5644: /* Fish out the optional data from the extra_data structure, first setting
                   5645: the default values. */
                   5646: 
                   5647: study = NULL;
                   5648: md->match_limit = MATCH_LIMIT;
                   5649: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   5650: md->callout_data = NULL;
                   5651: 
                   5652: /* The table pointer is always in native byte order. */
                   5653: 
                   5654: tables = external_re->tables;
                   5655: 
                   5656: if (extra_data != NULL)
                   5657:   {
                   5658:   register unsigned int flags = extra_data->flags;
                   5659:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   5660:     study = (const pcre_study_data *)extra_data->study_data;
                   5661:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   5662:     md->match_limit = extra_data->match_limit;
                   5663:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   5664:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   5665:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   5666:     md->callout_data = extra_data->callout_data;
                   5667:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   5668:   }
                   5669: 
                   5670: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   5671: is a feature that makes it possible to save compiled regex and re-use them
                   5672: in other programs later. */
                   5673: 
                   5674: if (tables == NULL) tables = _pcre_default_tables;
                   5675: 
                   5676: /* Check that the first field in the block is the magic number. If it is not,
                   5677: test for a regex that was compiled on a host of opposite endianness. If this is
                   5678: the case, flipped values are put in internal_re and internal_study if there was
                   5679: study data too. */
                   5680: 
                   5681: if (re->magic_number != MAGIC_NUMBER)
                   5682:   {
                   5683:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   5684:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   5685:   if (study != NULL) study = &internal_study;
                   5686:   }
                   5687: 
                   5688: /* Set up other data */
                   5689: 
                   5690: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   5691: startline = (re->flags & PCRE_STARTLINE) != 0;
                   5692: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   5693: 
                   5694: /* The code starts after the real_pcre block and the capture name table. */
                   5695: 
                   5696: md->start_code = (const uschar *)external_re + re->name_table_offset +
                   5697:   re->name_count * re->name_entry_size;
                   5698: 
                   5699: md->start_subject = (USPTR)subject;
                   5700: md->start_offset = start_offset;
                   5701: md->end_subject = md->start_subject + length;
                   5702: end_subject = md->end_subject;
                   5703: 
                   5704: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   5705: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
                   5706: md->use_ucp = (re->options & PCRE_UCP) != 0;
                   5707: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   5708: 
                   5709: md->notbol = (options & PCRE_NOTBOL) != 0;
                   5710: md->noteol = (options & PCRE_NOTEOL) != 0;
                   5711: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   5712: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
                   5713: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
                   5714:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
                   5715: md->hitend = FALSE;
                   5716: md->mark = NULL;                        /* In case never set */
                   5717: 
                   5718: md->recursive = NULL;                   /* No recursion at top level */
                   5719: 
                   5720: md->lcc = tables + lcc_offset;
                   5721: md->ctypes = tables + ctypes_offset;
                   5722: 
                   5723: /* Handle different \R options. */
                   5724: 
                   5725: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   5726:   {
                   5727:   case 0:
                   5728:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   5729:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   5730:   else
                   5731: #ifdef BSR_ANYCRLF
                   5732:   md->bsr_anycrlf = TRUE;
                   5733: #else
                   5734:   md->bsr_anycrlf = FALSE;
                   5735: #endif
                   5736:   break;
                   5737: 
                   5738:   case PCRE_BSR_ANYCRLF:
                   5739:   md->bsr_anycrlf = TRUE;
                   5740:   break;
                   5741: 
                   5742:   case PCRE_BSR_UNICODE:
                   5743:   md->bsr_anycrlf = FALSE;
                   5744:   break;
                   5745: 
                   5746:   default: return PCRE_ERROR_BADNEWLINE;
                   5747:   }
                   5748: 
                   5749: /* Handle different types of newline. The three bits give eight cases. If
                   5750: nothing is set at run time, whatever was used at compile time applies. */
                   5751: 
                   5752: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   5753:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   5754:   {
                   5755:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   5756:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   5757:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   5758:   case PCRE_NEWLINE_CR+
                   5759:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   5760:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   5761:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   5762:   default: return PCRE_ERROR_BADNEWLINE;
                   5763:   }
                   5764: 
                   5765: if (newline == -2)
                   5766:   {
                   5767:   md->nltype = NLTYPE_ANYCRLF;
                   5768:   }
                   5769: else if (newline < 0)
                   5770:   {
                   5771:   md->nltype = NLTYPE_ANY;
                   5772:   }
                   5773: else
                   5774:   {
                   5775:   md->nltype = NLTYPE_FIXED;
                   5776:   if (newline > 255)
                   5777:     {
                   5778:     md->nllen = 2;
                   5779:     md->nl[0] = (newline >> 8) & 255;
                   5780:     md->nl[1] = newline & 255;
                   5781:     }
                   5782:   else
                   5783:     {
                   5784:     md->nllen = 1;
                   5785:     md->nl[0] = newline;
                   5786:     }
                   5787:   }
                   5788: 
                   5789: /* Partial matching was originally supported only for a restricted set of
                   5790: regexes; from release 8.00 there are no restrictions, but the bits are still
                   5791: defined (though never set). So there's no harm in leaving this code. */
                   5792: 
                   5793: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   5794:   return PCRE_ERROR_BADPARTIAL;
                   5795: 
                   5796: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   5797: back the character offset. */
                   5798: 
                   5799: #ifdef SUPPORT_UTF8
                   5800: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   5801:   {
                   5802:   int tb;
                   5803:   if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
                   5804:     return (tb == length && md->partial > 1)?
                   5805:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
                   5806:   if (start_offset > 0 && start_offset < length)
                   5807:     {
                   5808:     tb = ((USPTR)subject)[start_offset] & 0xc0;
                   5809:     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
                   5810:     }
                   5811:   }
                   5812: #endif
                   5813: 
                   5814: /* The ims options can vary during the matching as a result of the presence
                   5815: of (?ims) items in the pattern. They are kept in a local variable so that
                   5816: restoring at the exit of a group is easy. */
                   5817: 
                   5818: ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
                   5819: 
                   5820: /* If the expression has got more back references than the offsets supplied can
                   5821: hold, we get a temporary chunk of working store to use during the matching.
                   5822: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   5823: of 3. */
                   5824: 
                   5825: ocount = offsetcount - (offsetcount % 3);
                   5826: 
                   5827: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   5828:   {
                   5829:   ocount = re->top_backref * 3 + 3;
                   5830:   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
                   5831:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   5832:   using_temporary_offsets = TRUE;
                   5833:   DPRINTF(("Got memory to hold back references\n"));
                   5834:   }
                   5835: else md->offset_vector = offsets;
                   5836: 
                   5837: md->offset_end = ocount;
                   5838: md->offset_max = (2*ocount)/3;
                   5839: md->offset_overflow = FALSE;
                   5840: md->capture_last = -1;
                   5841: 
                   5842: /* Compute the minimum number of offsets that we need to reset each time. Doing
                   5843: this makes a huge difference to execution time when there aren't many brackets
                   5844: in the pattern. */
                   5845: 
                   5846: resetcount = 2 + re->top_bracket * 2;
                   5847: if (resetcount > offsetcount) resetcount = ocount;
                   5848: 
                   5849: /* Reset the working variable associated with each extraction. These should
                   5850: never be used unless previously set, but they get saved and restored, and so we
                   5851: initialize them to avoid reading uninitialized locations. */
                   5852: 
                   5853: if (md->offset_vector != NULL)
                   5854:   {
                   5855:   register int *iptr = md->offset_vector + ocount;
                   5856:   register int *iend = iptr - resetcount/2 + 1;
                   5857:   while (--iptr >= iend) *iptr = -1;
                   5858:   }
                   5859: 
                   5860: /* Set up the first character to match, if available. The first_byte value is
                   5861: never set for an anchored regular expression, but the anchoring may be forced
                   5862: at run time, so we have to test for anchoring. The first char may be unset for
                   5863: an unanchored pattern, of course. If there's no first char and the pattern was
                   5864: studied, there may be a bitmap of possible first characters. */
                   5865: 
                   5866: if (!anchored)
                   5867:   {
                   5868:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   5869:     {
                   5870:     first_byte = re->first_byte & 255;
                   5871:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   5872:       first_byte = md->lcc[first_byte];
                   5873:     }
                   5874:   else
                   5875:     if (!startline && study != NULL &&
                   5876:       (study->flags & PCRE_STUDY_MAPPED) != 0)
                   5877:         start_bits = study->start_bits;
                   5878:   }
                   5879: 
                   5880: /* For anchored or unanchored matches, there may be a "last known required
                   5881: character" set. */
                   5882: 
                   5883: if ((re->flags & PCRE_REQCHSET) != 0)
                   5884:   {
                   5885:   req_byte = re->req_byte & 255;
                   5886:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   5887:   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
                   5888:   }
                   5889: 
                   5890: 
                   5891: /* ==========================================================================*/
                   5892: 
                   5893: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   5894: the loop runs just once. */
                   5895: 
                   5896: for(;;)
                   5897:   {
                   5898:   USPTR save_end_subject = end_subject;
                   5899:   USPTR new_start_match;
                   5900: 
                   5901:   /* Reset the maximum number of extractions we might see. */
                   5902: 
                   5903:   if (md->offset_vector != NULL)
                   5904:     {
                   5905:     register int *iptr = md->offset_vector;
                   5906:     register int *iend = iptr + resetcount;
                   5907:     while (iptr < iend) *iptr++ = -1;
                   5908:     }
                   5909: 
                   5910:   /* If firstline is TRUE, the start of the match is constrained to the first
                   5911:   line of a multiline string. That is, the match must be before or at the first
                   5912:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   5913:   scanning at a newline. If the match fails at the newline, later code breaks
                   5914:   this loop. */
                   5915: 
                   5916:   if (firstline)
                   5917:     {
                   5918:     USPTR t = start_match;
                   5919: #ifdef SUPPORT_UTF8
                   5920:     if (utf8)
                   5921:       {
                   5922:       while (t < md->end_subject && !IS_NEWLINE(t))
                   5923:         {
                   5924:         t++;
                   5925:         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
                   5926:         }
                   5927:       }
                   5928:     else
                   5929: #endif
                   5930:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   5931:     end_subject = t;
                   5932:     }
                   5933: 
                   5934:   /* There are some optimizations that avoid running the match if a known
                   5935:   starting point is not found, or if a known later character is not present.
                   5936:   However, there is an option that disables these, for testing and for ensuring
                   5937:   that all callouts do actually occur. The option can be set in the regex by
                   5938:   (*NO_START_OPT) or passed in match-time options. */
                   5939: 
                   5940:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   5941:     {
                   5942:     /* Advance to a unique first byte if there is one. */
                   5943: 
                   5944:     if (first_byte >= 0)
                   5945:       {
                   5946:       if (first_byte_caseless)
                   5947:         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
                   5948:           start_match++;
                   5949:       else
                   5950:         while (start_match < end_subject && *start_match != first_byte)
                   5951:           start_match++;
                   5952:       }
                   5953: 
                   5954:     /* Or to just after a linebreak for a multiline match */
                   5955: 
                   5956:     else if (startline)
                   5957:       {
                   5958:       if (start_match > md->start_subject + start_offset)
                   5959:         {
                   5960: #ifdef SUPPORT_UTF8
                   5961:         if (utf8)
                   5962:           {
                   5963:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   5964:             {
                   5965:             start_match++;
                   5966:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   5967:               start_match++;
                   5968:             }
                   5969:           }
                   5970:         else
                   5971: #endif
                   5972:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   5973:           start_match++;
                   5974: 
                   5975:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   5976:         and we are now at a LF, advance the match position by one more character.
                   5977:         */
                   5978: 
                   5979:         if (start_match[-1] == CHAR_CR &&
                   5980:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   5981:              start_match < end_subject &&
                   5982:              *start_match == CHAR_NL)
                   5983:           start_match++;
                   5984:         }
                   5985:       }
                   5986: 
                   5987:     /* Or to a non-unique first byte after study */
                   5988: 
                   5989:     else if (start_bits != NULL)
                   5990:       {
                   5991:       while (start_match < end_subject)
                   5992:         {
                   5993:         register unsigned int c = *start_match;
                   5994:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   5995:           {
                   5996:           start_match++;
                   5997: #ifdef SUPPORT_UTF8
                   5998:           if (utf8)
                   5999:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   6000:               start_match++;
                   6001: #endif
                   6002:           }
                   6003:         else break;
                   6004:         }
                   6005:       }
                   6006:     }   /* Starting optimizations */
                   6007: 
                   6008:   /* Restore fudged end_subject */
                   6009: 
                   6010:   end_subject = save_end_subject;
                   6011: 
                   6012:   /* The following two optimizations are disabled for partial matching or if
                   6013:   disabling is explicitly requested. */
                   6014: 
                   6015:   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
                   6016:     {
                   6017:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6018:     a lower bound; no actual string of that length may actually match the
                   6019:     pattern. Although the value is, strictly, in characters, we treat it as
                   6020:     bytes to avoid spending too much time in this optimization. */
                   6021: 
                   6022:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6023:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6024:       {
                   6025:       rc = MATCH_NOMATCH;
                   6026:       break;
                   6027:       }
                   6028: 
                   6029:     /* If req_byte is set, we know that that character must appear in the
                   6030:     subject for the match to succeed. If the first character is set, req_byte
                   6031:     must be later in the subject; otherwise the test starts at the match point.
                   6032:     This optimization can save a huge amount of backtracking in patterns with
                   6033:     nested unlimited repeats that aren't going to match. Writing separate code
                   6034:     for cased/caseless versions makes it go faster, as does using an
                   6035:     autoincrement and backing off on a match.
                   6036: 
                   6037:     HOWEVER: when the subject string is very, very long, searching to its end
                   6038:     can take a long time, and give bad performance on quite ordinary patterns.
                   6039:     This showed up when somebody was matching something like /^\d+C/ on a
                   6040:     32-megabyte string... so we don't do this when the string is sufficiently
                   6041:     long. */
                   6042: 
                   6043:     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
                   6044:       {
                   6045:       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
                   6046: 
                   6047:       /* We don't need to repeat the search if we haven't yet reached the
                   6048:       place we found it at last time. */
                   6049: 
                   6050:       if (p > req_byte_ptr)
                   6051:         {
                   6052:         if (req_byte_caseless)
                   6053:           {
                   6054:           while (p < end_subject)
                   6055:             {
                   6056:             register int pp = *p++;
                   6057:             if (pp == req_byte || pp == req_byte2) { p--; break; }
                   6058:             }
                   6059:           }
                   6060:         else
                   6061:           {
                   6062:           while (p < end_subject)
                   6063:             {
                   6064:             if (*p++ == req_byte) { p--; break; }
                   6065:             }
                   6066:           }
                   6067: 
                   6068:         /* If we can't find the required character, break the matching loop,
                   6069:         forcing a match failure. */
                   6070: 
                   6071:         if (p >= end_subject)
                   6072:           {
                   6073:           rc = MATCH_NOMATCH;
                   6074:           break;
                   6075:           }
                   6076: 
                   6077:         /* If we have found the required character, save the point where we
                   6078:         found it, so that we don't search again next time round the loop if
                   6079:         the start hasn't passed this character yet. */
                   6080: 
                   6081:         req_byte_ptr = p;
                   6082:         }
                   6083:       }
                   6084:     }
                   6085: 
                   6086: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6087:   printf(">>>> Match against: ");
                   6088:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6089:   printf("\n");
                   6090: #endif
                   6091: 
                   6092:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6093:   first starting point for which a partial match was found. */
                   6094: 
                   6095:   md->start_match_ptr = start_match;
                   6096:   md->start_used_ptr = start_match;
                   6097:   md->match_call_count = 0;
                   6098:   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
                   6099:     0, 0);
                   6100:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
                   6101: 
                   6102:   switch(rc)
                   6103:     {
                   6104:     /* SKIP passes back the next starting point explicitly, but if it is the
                   6105:     same as the match we have just done, treat it as NOMATCH. */
                   6106: 
                   6107:     case MATCH_SKIP:
                   6108:     if (md->start_match_ptr != start_match)
                   6109:       {
                   6110:       new_start_match = md->start_match_ptr;
                   6111:       break;
                   6112:       }
                   6113:     /* Fall through */
                   6114: 
                   6115:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
                   6116:     the SKIP's arg was not found. We also treat this as NOMATCH. */
                   6117: 
                   6118:     case MATCH_SKIP_ARG:
                   6119:     /* Fall through */
                   6120: 
                   6121:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   6122:     exactly like PRUNE. */
                   6123: 
                   6124:     case MATCH_NOMATCH:
                   6125:     case MATCH_PRUNE:
                   6126:     case MATCH_THEN:
                   6127:     new_start_match = start_match + 1;
                   6128: #ifdef SUPPORT_UTF8
                   6129:     if (utf8)
                   6130:       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
                   6131:         new_start_match++;
                   6132: #endif
                   6133:     break;
                   6134: 
                   6135:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6136: 
                   6137:     case MATCH_COMMIT:
                   6138:     rc = MATCH_NOMATCH;
                   6139:     goto ENDLOOP;
                   6140: 
                   6141:     /* Any other return is either a match, or some kind of error. */
                   6142: 
                   6143:     default:
                   6144:     goto ENDLOOP;
                   6145:     }
                   6146: 
                   6147:   /* Control reaches here for the various types of "no match at this point"
                   6148:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6149: 
                   6150:   rc = MATCH_NOMATCH;
                   6151: 
                   6152:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6153:   newline in the subject (though it may continue over the newline). Therefore,
                   6154:   if we have just failed to match, starting at a newline, do not continue. */
                   6155: 
                   6156:   if (firstline && IS_NEWLINE(start_match)) break;
                   6157: 
                   6158:   /* Advance to new matching position */
                   6159: 
                   6160:   start_match = new_start_match;
                   6161: 
                   6162:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6163:   the subject. */
                   6164: 
                   6165:   if (anchored || start_match > end_subject) break;
                   6166: 
                   6167:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6168:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   6169:   or ANY or ANYCRLF, advance the match position by one more character. */
                   6170: 
                   6171:   if (start_match[-1] == CHAR_CR &&
                   6172:       start_match < end_subject &&
                   6173:       *start_match == CHAR_NL &&
                   6174:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   6175:         (md->nltype == NLTYPE_ANY ||
                   6176:          md->nltype == NLTYPE_ANYCRLF ||
                   6177:          md->nllen == 2))
                   6178:     start_match++;
                   6179: 
                   6180:   md->mark = NULL;   /* Reset for start of next match attempt */
                   6181:   }                  /* End of for(;;) "bumpalong" loop */
                   6182: 
                   6183: /* ==========================================================================*/
                   6184: 
                   6185: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   6186: conditions is true:
                   6187: 
                   6188: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   6189: 
                   6190: (2) We are past the end of the subject;
                   6191: 
                   6192: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   6193:     this option requests that a match occur at or before the first newline in
                   6194:     the subject.
                   6195: 
                   6196: When we have a match and the offset vector is big enough to deal with any
                   6197: backreferences, captured substring offsets will already be set up. In the case
                   6198: where we had to get some local store to hold offsets for backreference
                   6199: processing, copy those that we can. In this case there need not be overflow if
                   6200: certain parts of the pattern were not used, even though there are more
                   6201: capturing parentheses than vector slots. */
                   6202: 
                   6203: ENDLOOP:
                   6204: 
                   6205: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
                   6206:   {
                   6207:   if (using_temporary_offsets)
                   6208:     {
                   6209:     if (offsetcount >= 4)
                   6210:       {
                   6211:       memcpy(offsets + 2, md->offset_vector + 2,
                   6212:         (offsetcount - 2) * sizeof(int));
                   6213:       DPRINTF(("Copied offsets from temporary memory\n"));
                   6214:       }
                   6215:     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
                   6216:     DPRINTF(("Freeing temporary memory\n"));
                   6217:     (pcre_free)(md->offset_vector);
                   6218:     }
                   6219: 
                   6220:   /* Set the return code to the number of captured strings, or 0 if there are
                   6221:   too many to fit into the vector. */
                   6222: 
                   6223:   rc = md->offset_overflow? 0 : md->end_offset_top/2;
                   6224: 
                   6225:   /* If there is space, set up the whole thing as substring 0. The value of
                   6226:   md->start_match_ptr might be modified if \K was encountered on the success
                   6227:   matching path. */
                   6228: 
                   6229:   if (offsetcount < 2) rc = 0; else
                   6230:     {
                   6231:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   6232:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
                   6233:     }
                   6234: 
                   6235:   DPRINTF((">>>> returning %d\n", rc));
                   6236:   goto RETURN_MARK;
                   6237:   }
                   6238: 
                   6239: /* Control gets here if there has been an error, or if the overall match
                   6240: attempt has failed at all permitted starting positions. */
                   6241: 
                   6242: if (using_temporary_offsets)
                   6243:   {
                   6244:   DPRINTF(("Freeing temporary memory\n"));
                   6245:   (pcre_free)(md->offset_vector);
                   6246:   }
                   6247: 
                   6248: /* For anything other than nomatch or partial match, just return the code. */
                   6249: 
                   6250: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
                   6251:   {
                   6252:   DPRINTF((">>>> error: returning %d\n", rc));
                   6253:   return rc;
                   6254:   }
                   6255: 
                   6256: /* Handle partial matches - disable any mark data */
                   6257: 
                   6258: if (start_partial != NULL)
                   6259:   {
                   6260:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   6261:   md->mark = NULL;
                   6262:   if (offsetcount > 1)
                   6263:     {
                   6264:     offsets[0] = (int)(start_partial - (USPTR)subject);
                   6265:     offsets[1] = (int)(end_subject - (USPTR)subject);
                   6266:     }
                   6267:   rc = PCRE_ERROR_PARTIAL;
                   6268:   }
                   6269: 
                   6270: /* This is the classic nomatch case */
                   6271: 
                   6272: else
                   6273:   {
                   6274:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   6275:   rc = PCRE_ERROR_NOMATCH;
                   6276:   }
                   6277: 
                   6278: /* Return the MARK data if it has been requested. */
                   6279: 
                   6280: RETURN_MARK:
                   6281: 
                   6282: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
                   6283:   *(extra_data->mark) = (unsigned char *)(md->mark);
                   6284: return rc;
                   6285: }
                   6286: 
                   6287: /* End of pcre_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>