Annotation of embedaddon/pcre/pcre_exec.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2011 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: #define NLBLOCK md             /* Block containing newline information */
                     50: #define PSSTART start_subject  /* Field containing processed string start */
                     51: #define PSEND   end_subject    /* Field containing processed string end */
                     52: 
                     53: #include "pcre_internal.h"
                     54: 
                     55: /* Undefine some potentially clashing cpp symbols */
                     56: 
                     57: #undef min
                     58: #undef max
                     59: 
                     60: /* Values for setting in md->match_function_type to indicate two special types
                     61: of call to match(). We do it this way to save on using another stack variable,
                     62: as stack usage is to be discouraged. */
                     63: 
                     64: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
                     65: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
                     66: 
                     67: /* Non-error returns from the match() function. Error returns are externally
                     68: defined PCRE_ERROR_xxx codes, which are all negative. */
                     69: 
                     70: #define MATCH_MATCH        1
                     71: #define MATCH_NOMATCH      0
                     72: 
                     73: /* Special internal returns from the match() function. Make them sufficiently
                     74: negative to avoid the external error codes. */
                     75: 
                     76: #define MATCH_ACCEPT       (-999)
                     77: #define MATCH_COMMIT       (-998)
                     78: #define MATCH_KETRPOS      (-997)
                     79: #define MATCH_ONCE         (-996)
                     80: #define MATCH_PRUNE        (-995)
                     81: #define MATCH_SKIP         (-994)
                     82: #define MATCH_SKIP_ARG     (-993)
                     83: #define MATCH_THEN         (-992)
                     84: 
                     85: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     86: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     87: because the offset vector is always a multiple of 3 long. */
                     88: 
                     89: #define REC_STACK_SAVE_MAX 30
                     90: 
                     91: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     92: 
                     93: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     94: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     95: 
                     96: 
                     97: 
                     98: #ifdef PCRE_DEBUG
                     99: /*************************************************
                    100: *        Debugging function to print chars       *
                    101: *************************************************/
                    102: 
                    103: /* Print a sequence of chars in printable format, stopping at the end of the
                    104: subject if the requested.
                    105: 
                    106: Arguments:
                    107:   p           points to characters
                    108:   length      number to print
                    109:   is_subject  TRUE if printing from within md->start_subject
                    110:   md          pointer to matching data block, if is_subject is TRUE
                    111: 
                    112: Returns:     nothing
                    113: */
                    114: 
                    115: static void
                    116: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
                    117: {
                    118: unsigned int c;
                    119: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    120: while (length-- > 0)
                    121:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    122: }
                    123: #endif
                    124: 
                    125: 
                    126: 
                    127: /*************************************************
                    128: *          Match a back-reference                *
                    129: *************************************************/
                    130: 
                    131: /* Normally, if a back reference hasn't been set, the length that is passed is
                    132: negative, so the match always fails. However, in JavaScript compatibility mode,
                    133: the length passed is zero. Note that in caseless UTF-8 mode, the number of
                    134: subject bytes matched may be different to the number of reference bytes.
                    135: 
                    136: Arguments:
                    137:   offset      index into the offset vector
                    138:   eptr        pointer into the subject
                    139:   length      length of reference to be matched (number of bytes)
                    140:   md          points to match data block
                    141:   caseless    TRUE if caseless
                    142: 
                    143: Returns:      < 0 if not matched, otherwise the number of subject bytes matched
                    144: */
                    145: 
                    146: static int
                    147: match_ref(int offset, register USPTR eptr, int length, match_data *md,
                    148:   BOOL caseless)
                    149: {
                    150: USPTR eptr_start = eptr;
                    151: register USPTR p = md->start_subject + md->offset_vector[offset];
                    152: 
                    153: #ifdef PCRE_DEBUG
                    154: if (eptr >= md->end_subject)
                    155:   printf("matching subject <null>");
                    156: else
                    157:   {
                    158:   printf("matching subject ");
                    159:   pchars(eptr, length, TRUE, md);
                    160:   }
                    161: printf(" against backref ");
                    162: pchars(p, length, FALSE, md);
                    163: printf("\n");
                    164: #endif
                    165: 
                    166: /* Always fail if reference not set (and not JavaScript compatible). */
                    167: 
                    168: if (length < 0) return -1;
                    169: 
                    170: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    171: properly if Unicode properties are supported. Otherwise, we can check only
                    172: ASCII characters. */
                    173: 
                    174: if (caseless)
                    175:   {
                    176: #ifdef SUPPORT_UTF8
                    177: #ifdef SUPPORT_UCP
                    178:   if (md->utf8)
                    179:     {
                    180:     /* Match characters up to the end of the reference. NOTE: the number of
                    181:     bytes matched may differ, because there are some characters whose upper and
                    182:     lower case versions code as different numbers of bytes. For example, U+023A
                    183:     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
                    184:     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
                    185:     the latter. It is important, therefore, to check the length along the
                    186:     reference, not along the subject (earlier code did this wrong). */
                    187: 
                    188:     USPTR endptr = p + length;
                    189:     while (p < endptr)
                    190:       {
                    191:       int c, d;
                    192:       if (eptr >= md->end_subject) return -1;
                    193:       GETCHARINC(c, eptr);
                    194:       GETCHARINC(d, p);
                    195:       if (c != d && c != UCD_OTHERCASE(d)) return -1;
                    196:       }
                    197:     }
                    198:   else
                    199: #endif
                    200: #endif
                    201: 
                    202:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    203:   is no UCP support. */
                    204:     {
                    205:     if (eptr + length > md->end_subject) return -1;
                    206:     while (length-- > 0)
                    207:       { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
                    208:     }
                    209:   }
                    210: 
                    211: /* In the caseful case, we can just compare the bytes, whether or not we
                    212: are in UTF-8 mode. */
                    213: 
                    214: else
                    215:   {
                    216:   if (eptr + length > md->end_subject) return -1;
                    217:   while (length-- > 0) if (*p++ != *eptr++) return -1;
                    218:   }
                    219: 
                    220: return (int)(eptr - eptr_start);
                    221: }
                    222: 
                    223: 
                    224: 
                    225: /***************************************************************************
                    226: ****************************************************************************
                    227:                    RECURSION IN THE match() FUNCTION
                    228: 
                    229: The match() function is highly recursive, though not every recursive call
                    230: increases the recursive depth. Nevertheless, some regular expressions can cause
                    231: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    232: itself recursively. This uses the stack for saving everything that has to be
                    233: saved for a recursive call. On Unix, the stack can be large, and this works
                    234: fine.
                    235: 
                    236: It turns out that on some non-Unix-like systems there are problems with
                    237: programs that use a lot of stack. (This despite the fact that every last chip
                    238: has oodles of memory these days, and techniques for extending the stack have
                    239: been known for decades.) So....
                    240: 
                    241: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    242: calls by keeping local variables that need to be preserved in blocks of memory
                    243: obtained from malloc() instead instead of on the stack. Macros are used to
                    244: achieve this so that the actual code doesn't look very different to what it
                    245: always used to.
                    246: 
                    247: The original heap-recursive code used longjmp(). However, it seems that this
                    248: can be very slow on some operating systems. Following a suggestion from Stan
                    249: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    250: provide a unique number for each call to RMATCH. There is no way of generating
                    251: a sequence of numbers at compile time in C. I have given them names, to make
                    252: them stand out more clearly.
                    253: 
                    254: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    255: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    256: tests. Furthermore, not using longjmp() means that local dynamic variables
                    257: don't have indeterminate values; this has meant that the frame size can be
                    258: reduced because the result can be "passed back" by straight setting of the
                    259: variable instead of being passed in the frame.
                    260: ****************************************************************************
                    261: ***************************************************************************/
                    262: 
                    263: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    264: below must be updated in sync.  */
                    265: 
                    266: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    267:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    268:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    269:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    270:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    271:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
                    272:        RM61,  RM62, RM63, RM64, RM65, RM66 };
                    273: 
                    274: /* These versions of the macros use the stack, as normal. There are debugging
                    275: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    276: actually used in this definition. */
                    277: 
                    278: #ifndef NO_RECURSE
                    279: #define REGISTER register
                    280: 
                    281: #ifdef PCRE_DEBUG
                    282: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    283:   { \
                    284:   printf("match() called in line %d\n", __LINE__); \
                    285:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
                    286:   printf("to line %d\n", __LINE__); \
                    287:   }
                    288: #define RRETURN(ra) \
                    289:   { \
                    290:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    291:   return ra; \
                    292:   }
                    293: #else
                    294: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    295:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
                    296: #define RRETURN(ra) return ra
                    297: #endif
                    298: 
                    299: #else
                    300: 
                    301: 
                    302: /* These versions of the macros manage a private stack on the heap. Note that
                    303: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    304: argument of match(), which never changes. */
                    305: 
                    306: #define REGISTER
                    307: 
                    308: #define RMATCH(ra,rb,rc,rd,re,rw)\
                    309:   {\
                    310:   heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
                    311:   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
                    312:   frame->Xwhere = rw; \
                    313:   newframe->Xeptr = ra;\
                    314:   newframe->Xecode = rb;\
                    315:   newframe->Xmstart = mstart;\
                    316:   newframe->Xoffset_top = rc;\
                    317:   newframe->Xeptrb = re;\
                    318:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    319:   newframe->Xprevframe = frame;\
                    320:   frame = newframe;\
                    321:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    322:   goto HEAP_RECURSE;\
                    323:   L_##rw:\
                    324:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    325:   }
                    326: 
                    327: #define RRETURN(ra)\
                    328:   {\
                    329:   heapframe *oldframe = frame;\
                    330:   frame = oldframe->Xprevframe;\
                    331:   (pcre_stack_free)(oldframe);\
                    332:   if (frame != NULL)\
                    333:     {\
                    334:     rrc = ra;\
                    335:     goto HEAP_RETURN;\
                    336:     }\
                    337:   return ra;\
                    338:   }
                    339: 
                    340: 
                    341: /* Structure for remembering the local variables in a private frame */
                    342: 
                    343: typedef struct heapframe {
                    344:   struct heapframe *Xprevframe;
                    345: 
                    346:   /* Function arguments that may change */
                    347: 
                    348:   USPTR Xeptr;
                    349:   const uschar *Xecode;
                    350:   USPTR Xmstart;
                    351:   int Xoffset_top;
                    352:   eptrblock *Xeptrb;
                    353:   unsigned int Xrdepth;
                    354: 
                    355:   /* Function local variables */
                    356: 
                    357:   USPTR Xcallpat;
                    358: #ifdef SUPPORT_UTF8
                    359:   USPTR Xcharptr;
                    360: #endif
                    361:   USPTR Xdata;
                    362:   USPTR Xnext;
                    363:   USPTR Xpp;
                    364:   USPTR Xprev;
                    365:   USPTR Xsaved_eptr;
                    366: 
                    367:   recursion_info Xnew_recursive;
                    368: 
                    369:   BOOL Xcur_is_word;
                    370:   BOOL Xcondition;
                    371:   BOOL Xprev_is_word;
                    372: 
                    373: #ifdef SUPPORT_UCP
                    374:   int Xprop_type;
                    375:   int Xprop_value;
                    376:   int Xprop_fail_result;
                    377:   int Xoclength;
                    378:   uschar Xocchars[8];
                    379: #endif
                    380: 
                    381:   int Xcodelink;
                    382:   int Xctype;
                    383:   unsigned int Xfc;
                    384:   int Xfi;
                    385:   int Xlength;
                    386:   int Xmax;
                    387:   int Xmin;
                    388:   int Xnumber;
                    389:   int Xoffset;
                    390:   int Xop;
                    391:   int Xsave_capture_last;
                    392:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    393:   int Xstacksave[REC_STACK_SAVE_MAX];
                    394: 
                    395:   eptrblock Xnewptrb;
                    396: 
                    397:   /* Where to jump back to */
                    398: 
                    399:   int Xwhere;
                    400: 
                    401: } heapframe;
                    402: 
                    403: #endif
                    404: 
                    405: 
                    406: /***************************************************************************
                    407: ***************************************************************************/
                    408: 
                    409: 
                    410: 
                    411: /*************************************************
                    412: *         Match from current position            *
                    413: *************************************************/
                    414: 
                    415: /* This function is called recursively in many circumstances. Whenever it
                    416: returns a negative (error) response, the outer incarnation must also return the
                    417: same response. */
                    418: 
                    419: /* These macros pack up tests that are used for partial matching, and which
                    420: appear several times in the code. We set the "hit end" flag if the pointer is
                    421: at the end of the subject and also past the start of the subject (i.e.
                    422: something has been matched). For hard partial matching, we then return
                    423: immediately. The second one is used when we already know we are past the end of
                    424: the subject. */
                    425: 
                    426: #define CHECK_PARTIAL()\
                    427:   if (md->partial != 0 && eptr >= md->end_subject && \
                    428:       eptr > md->start_used_ptr) \
                    429:     { \
                    430:     md->hitend = TRUE; \
                    431:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
                    432:     }
                    433: 
                    434: #define SCHECK_PARTIAL()\
                    435:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    436:     { \
                    437:     md->hitend = TRUE; \
                    438:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
                    439:     }
                    440: 
                    441: 
                    442: /* Performance note: It might be tempting to extract commonly used fields from
                    443: the md structure (e.g. utf8, end_subject) into individual variables to improve
                    444: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    445: made performance worse.
                    446: 
                    447: Arguments:
                    448:    eptr        pointer to current character in subject
                    449:    ecode       pointer to current position in compiled code
                    450:    mstart      pointer to the current match start position (can be modified
                    451:                  by encountering \K)
                    452:    offset_top  current top pointer
                    453:    md          pointer to "static" info for the match
                    454:    eptrb       pointer to chain of blocks containing eptr at start of
                    455:                  brackets - for testing for empty matches
                    456:    rdepth      the recursion depth
                    457: 
                    458: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    459:                MATCH_NOMATCH if failed to match  )
                    460:                a negative MATCH_xxx value for PRUNE, SKIP, etc
                    461:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    462:                  (e.g. stopped by repeated call or recursion limit)
                    463: */
                    464: 
                    465: static int
                    466: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
                    467:   int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
                    468: {
                    469: /* These variables do not need to be preserved over recursion in this function,
                    470: so they can be ordinary variables in all cases. Mark some of them with
                    471: "register" because they are used a lot in loops. */
                    472: 
                    473: register int  rrc;         /* Returns from recursive calls */
                    474: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    475: register unsigned int c;   /* Character values not kept over RMATCH() calls */
                    476: register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
                    477: 
                    478: BOOL minimize, possessive; /* Quantifier options */
                    479: BOOL caseless;
                    480: int condcode;
                    481: 
                    482: /* When recursion is not being used, all "local" variables that have to be
                    483: preserved over calls to RMATCH() are part of a "frame" which is obtained from
                    484: heap storage. Set up the top-level frame here; others are obtained from the
                    485: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
                    486: 
                    487: #ifdef NO_RECURSE
                    488: heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
                    489: if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                    490: frame->Xprevframe = NULL;            /* Marks the top level */
                    491: 
                    492: /* Copy in the original argument variables */
                    493: 
                    494: frame->Xeptr = eptr;
                    495: frame->Xecode = ecode;
                    496: frame->Xmstart = mstart;
                    497: frame->Xoffset_top = offset_top;
                    498: frame->Xeptrb = eptrb;
                    499: frame->Xrdepth = rdepth;
                    500: 
                    501: /* This is where control jumps back to to effect "recursion" */
                    502: 
                    503: HEAP_RECURSE:
                    504: 
                    505: /* Macros make the argument variables come from the current frame */
                    506: 
                    507: #define eptr               frame->Xeptr
                    508: #define ecode              frame->Xecode
                    509: #define mstart             frame->Xmstart
                    510: #define offset_top         frame->Xoffset_top
                    511: #define eptrb              frame->Xeptrb
                    512: #define rdepth             frame->Xrdepth
                    513: 
                    514: /* Ditto for the local variables */
                    515: 
                    516: #ifdef SUPPORT_UTF8
                    517: #define charptr            frame->Xcharptr
                    518: #endif
                    519: #define callpat            frame->Xcallpat
                    520: #define codelink           frame->Xcodelink
                    521: #define data               frame->Xdata
                    522: #define next               frame->Xnext
                    523: #define pp                 frame->Xpp
                    524: #define prev               frame->Xprev
                    525: #define saved_eptr         frame->Xsaved_eptr
                    526: 
                    527: #define new_recursive      frame->Xnew_recursive
                    528: 
                    529: #define cur_is_word        frame->Xcur_is_word
                    530: #define condition          frame->Xcondition
                    531: #define prev_is_word       frame->Xprev_is_word
                    532: 
                    533: #ifdef SUPPORT_UCP
                    534: #define prop_type          frame->Xprop_type
                    535: #define prop_value         frame->Xprop_value
                    536: #define prop_fail_result   frame->Xprop_fail_result
                    537: #define oclength           frame->Xoclength
                    538: #define occhars            frame->Xocchars
                    539: #endif
                    540: 
                    541: #define ctype              frame->Xctype
                    542: #define fc                 frame->Xfc
                    543: #define fi                 frame->Xfi
                    544: #define length             frame->Xlength
                    545: #define max                frame->Xmax
                    546: #define min                frame->Xmin
                    547: #define number             frame->Xnumber
                    548: #define offset             frame->Xoffset
                    549: #define op                 frame->Xop
                    550: #define save_capture_last  frame->Xsave_capture_last
                    551: #define save_offset1       frame->Xsave_offset1
                    552: #define save_offset2       frame->Xsave_offset2
                    553: #define save_offset3       frame->Xsave_offset3
                    554: #define stacksave          frame->Xstacksave
                    555: 
                    556: #define newptrb            frame->Xnewptrb
                    557: 
                    558: /* When recursion is being used, local variables are allocated on the stack and
                    559: get preserved during recursion in the normal way. In this environment, fi and
                    560: i, and fc and c, can be the same variables. */
                    561: 
                    562: #else         /* NO_RECURSE not defined */
                    563: #define fi i
                    564: #define fc c
                    565: 
                    566: /* Many of the following variables are used only in small blocks of the code.
                    567: My normal style of coding would have declared them within each of those blocks.
                    568: However, in order to accommodate the version of this code that uses an external
                    569: "stack" implemented on the heap, it is easier to declare them all here, so the
                    570: declarations can be cut out in a block. The only declarations within blocks
                    571: below are for variables that do not have to be preserved over a recursive call
                    572: to RMATCH(). */
                    573: 
                    574: #ifdef SUPPORT_UTF8
                    575: const uschar *charptr;
                    576: #endif
                    577: const uschar *callpat;
                    578: const uschar *data;
                    579: const uschar *next;
                    580: USPTR         pp;
                    581: const uschar *prev;
                    582: USPTR         saved_eptr;
                    583: 
                    584: recursion_info new_recursive;
                    585: 
                    586: BOOL cur_is_word;
                    587: BOOL condition;
                    588: BOOL prev_is_word;
                    589: 
                    590: #ifdef SUPPORT_UCP
                    591: int prop_type;
                    592: int prop_value;
                    593: int prop_fail_result;
                    594: int oclength;
                    595: uschar occhars[8];
                    596: #endif
                    597: 
                    598: int codelink;
                    599: int ctype;
                    600: int length;
                    601: int max;
                    602: int min;
                    603: int number;
                    604: int offset;
                    605: int op;
                    606: int save_capture_last;
                    607: int save_offset1, save_offset2, save_offset3;
                    608: int stacksave[REC_STACK_SAVE_MAX];
                    609: 
                    610: eptrblock newptrb;
                    611: #endif     /* NO_RECURSE */
                    612: 
                    613: /* To save space on the stack and in the heap frame, I have doubled up on some
                    614: of the local variables that are used only in localised parts of the code, but
                    615: still need to be preserved over recursive calls of match(). These macros define
                    616: the alternative names that are used. */
                    617: 
                    618: #define allow_zero    cur_is_word
                    619: #define cbegroup      condition
                    620: #define code_offset   codelink
                    621: #define condassert    condition
                    622: #define matched_once  prev_is_word
                    623: 
                    624: /* These statements are here to stop the compiler complaining about unitialized
                    625: variables. */
                    626: 
                    627: #ifdef SUPPORT_UCP
                    628: prop_value = 0;
                    629: prop_fail_result = 0;
                    630: #endif
                    631: 
                    632: 
                    633: /* This label is used for tail recursion, which is used in a few cases even
                    634: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    635: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    636: original patch. */
                    637: 
                    638: TAIL_RECURSE:
                    639: 
                    640: /* OK, now we can get on with the real code of the function. Recursive calls
                    641: are specified by the macro RMATCH and RRETURN is used to return. When
                    642: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    643: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
                    644: defined). However, RMATCH isn't like a function call because it's quite a
                    645: complicated macro. It has to be used in one particular way. This shouldn't,
                    646: however, impact performance when true recursion is being used. */
                    647: 
                    648: #ifdef SUPPORT_UTF8
                    649: utf8 = md->utf8;       /* Local copy of the flag */
                    650: #else
                    651: utf8 = FALSE;
                    652: #endif
                    653: 
                    654: /* First check that we haven't called match() too many times, or that we
                    655: haven't exceeded the recursive call limit. */
                    656: 
                    657: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    658: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    659: 
                    660: /* At the start of a group with an unlimited repeat that may match an empty
                    661: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
                    662: done this way to save having to use another function argument, which would take
                    663: up space on the stack. See also MATCH_CONDASSERT below.
                    664: 
                    665: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
                    666: such remembered pointers, to be checked when we hit the closing ket, in order
                    667: to break infinite loops that match no characters. When match() is called in
                    668: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
                    669: NOT be used with tail recursion, because the memory block that is used is on
                    670: the stack, so a new one may be required for each match(). */
                    671: 
                    672: if (md->match_function_type == MATCH_CBEGROUP)
                    673:   {
                    674:   newptrb.epb_saved_eptr = eptr;
                    675:   newptrb.epb_prev = eptrb;
                    676:   eptrb = &newptrb;
                    677:   md->match_function_type = 0;
                    678:   }
                    679: 
                    680: /* Now start processing the opcodes. */
                    681: 
                    682: for (;;)
                    683:   {
                    684:   minimize = possessive = FALSE;
                    685:   op = *ecode;
                    686: 
                    687:   switch(op)
                    688:     {
                    689:     case OP_MARK:
                    690:     md->nomatch_mark = ecode + 2;
                    691:     md->mark = NULL;    /* In case previously set by assertion */
                    692:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    693:       eptrb, RM55);
                    694:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    695:          md->mark == NULL) md->mark = ecode + 2;
                    696: 
                    697:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    698:     argument, and we must check whether that argument matches this MARK's
                    699:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    700:     variable). If it does match, we reset that variable to the current subject
                    701:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    702:     unaltered. */
                    703: 
                    704:     else if (rrc == MATCH_SKIP_ARG &&
                    705:         strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)
                    706:       {
                    707:       md->start_match_ptr = eptr;
                    708:       RRETURN(MATCH_SKIP);
                    709:       }
                    710:     RRETURN(rrc);
                    711: 
                    712:     case OP_FAIL:
                    713:     RRETURN(MATCH_NOMATCH);
                    714: 
                    715:     /* COMMIT overrides PRUNE, SKIP, and THEN */
                    716: 
                    717:     case OP_COMMIT:
                    718:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    719:       eptrb, RM52);
                    720:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
                    721:         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
                    722:         rrc != MATCH_THEN)
                    723:       RRETURN(rrc);
                    724:     RRETURN(MATCH_COMMIT);
                    725: 
                    726:     /* PRUNE overrides THEN */
                    727: 
                    728:     case OP_PRUNE:
                    729:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    730:       eptrb, RM51);
                    731:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    732:     RRETURN(MATCH_PRUNE);
                    733: 
                    734:     case OP_PRUNE_ARG:
                    735:     md->nomatch_mark = ecode + 2;
                    736:     md->mark = NULL;    /* In case previously set by assertion */
                    737:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    738:       eptrb, RM56);
                    739:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    740:          md->mark == NULL) md->mark = ecode + 2;
                    741:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    742:     RRETURN(MATCH_PRUNE);
                    743: 
                    744:     /* SKIP overrides PRUNE and THEN */
                    745: 
                    746:     case OP_SKIP:
                    747:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    748:       eptrb, RM53);
                    749:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    750:       RRETURN(rrc);
                    751:     md->start_match_ptr = eptr;   /* Pass back current position */
                    752:     RRETURN(MATCH_SKIP);
                    753: 
                    754:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
                    755:     nomatch_mark. There is a flag that disables this opcode when re-matching a
                    756:     pattern that ended with a SKIP for which there was not a matching MARK. */
                    757: 
                    758:     case OP_SKIP_ARG:
                    759:     if (md->ignore_skip_arg)
                    760:       {
                    761:       ecode += _pcre_OP_lengths[*ecode] + ecode[1];
                    762:       break;
                    763:       }
                    764:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    765:       eptrb, RM57);
                    766:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    767:       RRETURN(rrc);
                    768: 
                    769:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    770:     returning the special MATCH_SKIP_ARG return code. This will either be
                    771:     caught by a matching MARK, or get to the top, where it causes a rematch
                    772:     with the md->ignore_skip_arg flag set. */
                    773: 
                    774:     md->start_match_ptr = ecode + 2;
                    775:     RRETURN(MATCH_SKIP_ARG);
                    776: 
                    777:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
                    778:     the branch in which it occurs can be determined. Overload the start of
                    779:     match pointer to do this. */
                    780: 
                    781:     case OP_THEN:
                    782:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    783:       eptrb, RM54);
                    784:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    785:     md->start_match_ptr = ecode;
                    786:     RRETURN(MATCH_THEN);
                    787: 
                    788:     case OP_THEN_ARG:
                    789:     md->nomatch_mark = ecode + 2;
                    790:     md->mark = NULL;    /* In case previously set by assertion */
                    791:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,
                    792:       md, eptrb, RM58);
                    793:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    794:          md->mark == NULL) md->mark = ecode + 2;
                    795:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    796:     md->start_match_ptr = ecode;
                    797:     RRETURN(MATCH_THEN);
                    798: 
                    799:     /* Handle an atomic group that does not contain any capturing parentheses.
                    800:     This can be handled like an assertion. Prior to 8.13, all atomic groups
                    801:     were handled this way. In 8.13, the code was changed as below for ONCE, so
                    802:     that backups pass through the group and thereby reset captured values.
                    803:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
                    804:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
                    805:     less stack intensive way.
                    806: 
                    807:     Check the alternative branches in turn - the matching won't pass the KET
                    808:     for this kind of subpattern. If any one branch matches, we carry on as at
                    809:     the end of a normal bracket, leaving the subject pointer, but resetting
                    810:     the start-of-match value in case it was changed by \K. */
                    811: 
                    812:     case OP_ONCE_NC:
                    813:     prev = ecode;
                    814:     saved_eptr = eptr;
                    815:     do
                    816:       {
                    817:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
                    818:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
                    819:         {
                    820:         mstart = md->start_match_ptr;
                    821:         break;
                    822:         }
                    823:       if (rrc == MATCH_THEN)
                    824:         {
                    825:         next = ecode + GET(ecode,1);
                    826:         if (md->start_match_ptr < next &&
                    827:             (*ecode == OP_ALT || *next == OP_ALT))
                    828:           rrc = MATCH_NOMATCH;
                    829:         }
                    830: 
                    831:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    832:       ecode += GET(ecode,1);
                    833:       }
                    834:     while (*ecode == OP_ALT);
                    835: 
                    836:     /* If hit the end of the group (which could be repeated), fail */
                    837: 
                    838:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                    839: 
                    840:     /* Continue as from after the group, updating the offsets high water
                    841:     mark, since extracts may have been taken. */
                    842: 
                    843:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                    844: 
                    845:     offset_top = md->end_offset_top;
                    846:     eptr = md->end_match_ptr;
                    847: 
                    848:     /* For a non-repeating ket, just continue at this level. This also
                    849:     happens for a repeating ket if no characters were matched in the group.
                    850:     This is the forcible breaking of infinite loops as implemented in Perl
                    851:     5.005. */
                    852: 
                    853:     if (*ecode == OP_KET || eptr == saved_eptr)
                    854:       {
                    855:       ecode += 1+LINK_SIZE;
                    856:       break;
                    857:       }
                    858: 
                    859:     /* The repeating kets try the rest of the pattern or restart from the
                    860:     preceding bracket, in the appropriate order. The second "call" of match()
                    861:     uses tail recursion, to avoid using another stack frame. */
                    862: 
                    863:     if (*ecode == OP_KETRMIN)
                    864:       {
                    865:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
                    866:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    867:       ecode = prev;
                    868:       goto TAIL_RECURSE;
                    869:       }
                    870:     else  /* OP_KETRMAX */
                    871:       {
                    872:       md->match_function_type = MATCH_CBEGROUP;
                    873:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
                    874:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    875:       ecode += 1 + LINK_SIZE;
                    876:       goto TAIL_RECURSE;
                    877:       }
                    878:     /* Control never gets here */
                    879: 
                    880:     /* Handle a capturing bracket, other than those that are possessive with an
                    881:     unlimited repeat. If there is space in the offset vector, save the current
                    882:     subject position in the working slot at the top of the vector. We mustn't
                    883:     change the current values of the data slot, because they may be set from a
                    884:     previous iteration of this group, and be referred to by a reference inside
                    885:     the group. A failure to match might occur after the group has succeeded,
                    886:     if something later on doesn't match. For this reason, we need to restore
                    887:     the working value and also the values of the final offsets, in case they
                    888:     were set by a previous iteration of the same bracket.
                    889: 
                    890:     If there isn't enough space in the offset vector, treat this as if it were
                    891:     a non-capturing bracket. Don't worry about setting the flag for the error
                    892:     case here; that is handled in the code for KET. */
                    893: 
                    894:     case OP_CBRA:
                    895:     case OP_SCBRA:
                    896:     number = GET2(ecode, 1+LINK_SIZE);
                    897:     offset = number << 1;
                    898: 
                    899: #ifdef PCRE_DEBUG
                    900:     printf("start bracket %d\n", number);
                    901:     printf("subject=");
                    902:     pchars(eptr, 16, TRUE, md);
                    903:     printf("\n");
                    904: #endif
                    905: 
                    906:     if (offset < md->offset_max)
                    907:       {
                    908:       save_offset1 = md->offset_vector[offset];
                    909:       save_offset2 = md->offset_vector[offset+1];
                    910:       save_offset3 = md->offset_vector[md->offset_end - number];
                    911:       save_capture_last = md->capture_last;
                    912: 
                    913:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    914:       md->offset_vector[md->offset_end - number] =
                    915:         (int)(eptr - md->start_subject);
                    916: 
                    917:       for (;;)
                    918:         {
                    919:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                    920:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    921:           eptrb, RM1);
                    922:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
                    923: 
                    924:         /* If we backed up to a THEN, check whether it is within the current
                    925:         branch by comparing the address of the THEN that is passed back with
                    926:         the end of the branch. If it is within the current branch, and the
                    927:         branch is one of two or more alternatives (it either starts or ends
                    928:         with OP_ALT), we have reached the limit of THEN's action, so convert
                    929:         the return code to NOMATCH, which will cause normal backtracking to
                    930:         happen from now on. Otherwise, THEN is passed back to an outer
                    931:         alternative. This implements Perl's treatment of parenthesized groups,
                    932:         where a group not containing | does not affect the current alternative,
                    933:         that is, (X) is NOT the same as (X|(*F)). */
                    934: 
                    935:         if (rrc == MATCH_THEN)
                    936:           {
                    937:           next = ecode + GET(ecode,1);
                    938:           if (md->start_match_ptr < next &&
                    939:               (*ecode == OP_ALT || *next == OP_ALT))
                    940:             rrc = MATCH_NOMATCH;
                    941:           }
                    942: 
                    943:         /* Anything other than NOMATCH is passed back. */
                    944: 
                    945:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    946:         md->capture_last = save_capture_last;
                    947:         ecode += GET(ecode, 1);
                    948:         if (*ecode != OP_ALT) break;
                    949:         }
                    950: 
                    951:       DPRINTF(("bracket %d failed\n", number));
                    952:       md->offset_vector[offset] = save_offset1;
                    953:       md->offset_vector[offset+1] = save_offset2;
                    954:       md->offset_vector[md->offset_end - number] = save_offset3;
                    955: 
                    956:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
                    957: 
                    958:       RRETURN(rrc);
                    959:       }
                    960: 
                    961:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    962:     as a non-capturing bracket. */
                    963: 
                    964:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    965:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    966: 
                    967:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    968: 
                    969:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    970:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    971: 
                    972:     /* Non-capturing or atomic group, except for possessive with unlimited
                    973:     repeat and ONCE group with no captures. Loop for all the alternatives.
                    974: 
                    975:     When we get to the final alternative within the brackets, we used to return
                    976:     the result of a recursive call to match() whatever happened so it was
                    977:     possible to reduce stack usage by turning this into a tail recursion,
                    978:     except in the case of a possibly empty group. However, now that there is
                    979:     the possiblity of (*THEN) occurring in the final alternative, this
                    980:     optimization is no longer always possible.
                    981: 
                    982:     We can optimize if we know there are no (*THEN)s in the pattern; at present
                    983:     this is the best that can be done.
                    984: 
                    985:     MATCH_ONCE is returned when the end of an atomic group is successfully
                    986:     reached, but subsequent matching fails. It passes back up the tree (causing
                    987:     captured values to be reset) until the original atomic group level is
                    988:     reached. This is tested by comparing md->once_target with the start of the
                    989:     group. At this point, the return is converted into MATCH_NOMATCH so that
                    990:     previous backup points can be taken. */
                    991: 
                    992:     case OP_ONCE:
                    993:     case OP_BRA:
                    994:     case OP_SBRA:
                    995:     DPRINTF(("start non-capturing bracket\n"));
                    996: 
                    997:     for (;;)
                    998:       {
                    999:       if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
                   1000: 
                   1001:       /* If this is not a possibly empty group, and there are no (*THEN)s in
                   1002:       the pattern, and this is the final alternative, optimize as described
                   1003:       above. */
                   1004: 
                   1005:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
                   1006:         {
                   1007:         ecode += _pcre_OP_lengths[*ecode];
                   1008:         goto TAIL_RECURSE;
                   1009:         }
                   1010: 
                   1011:       /* In all other cases, we have to make another call to match(). */
                   1012: 
                   1013:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
                   1014:         RM2);
                   1015: 
                   1016:       /* See comment in the code for capturing groups above about handling
                   1017:       THEN. */
                   1018: 
                   1019:       if (rrc == MATCH_THEN)
                   1020:         {
                   1021:         next = ecode + GET(ecode,1);
                   1022:         if (md->start_match_ptr < next &&
                   1023:             (*ecode == OP_ALT || *next == OP_ALT))
                   1024:           rrc = MATCH_NOMATCH;
                   1025:         }
                   1026: 
                   1027:       if (rrc != MATCH_NOMATCH)
                   1028:         {
                   1029:         if (rrc == MATCH_ONCE)
                   1030:           {
                   1031:           const uschar *scode = ecode;
                   1032:           if (*scode != OP_ONCE)           /* If not at start, find it */
                   1033:             {
                   1034:             while (*scode == OP_ALT) scode += GET(scode, 1);
                   1035:             scode -= GET(scode, 1);
                   1036:             }
                   1037:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
                   1038:           }
                   1039:         RRETURN(rrc);
                   1040:         }
                   1041:       ecode += GET(ecode, 1);
                   1042:       if (*ecode != OP_ALT) break;
                   1043:       }
                   1044: 
                   1045:     RRETURN(MATCH_NOMATCH);
                   1046: 
                   1047:     /* Handle possessive capturing brackets with an unlimited repeat. We come
                   1048:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
                   1049:     handled similarly to the normal case above. However, the matching is
                   1050:     different. The end of these brackets will always be OP_KETRPOS, which
                   1051:     returns MATCH_KETRPOS without going further in the pattern. By this means
                   1052:     we can handle the group by iteration rather than recursion, thereby
                   1053:     reducing the amount of stack needed. */
                   1054: 
                   1055:     case OP_CBRAPOS:
                   1056:     case OP_SCBRAPOS:
                   1057:     allow_zero = FALSE;
                   1058: 
                   1059:     POSSESSIVE_CAPTURE:
                   1060:     number = GET2(ecode, 1+LINK_SIZE);
                   1061:     offset = number << 1;
                   1062: 
                   1063: #ifdef PCRE_DEBUG
                   1064:     printf("start possessive bracket %d\n", number);
                   1065:     printf("subject=");
                   1066:     pchars(eptr, 16, TRUE, md);
                   1067:     printf("\n");
                   1068: #endif
                   1069: 
                   1070:     if (offset < md->offset_max)
                   1071:       {
                   1072:       matched_once = FALSE;
                   1073:       code_offset = (int)(ecode - md->start_code);
                   1074: 
                   1075:       save_offset1 = md->offset_vector[offset];
                   1076:       save_offset2 = md->offset_vector[offset+1];
                   1077:       save_offset3 = md->offset_vector[md->offset_end - number];
                   1078:       save_capture_last = md->capture_last;
                   1079: 
                   1080:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                   1081: 
                   1082:       /* Each time round the loop, save the current subject position for use
                   1083:       when the group matches. For MATCH_MATCH, the group has matched, so we
                   1084:       restart it with a new subject starting position, remembering that we had
                   1085:       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
                   1086:       usual. If we haven't matched any alternatives in any iteration, check to
                   1087:       see if a previous iteration matched. If so, the group has matched;
                   1088:       continue from afterwards. Otherwise it has failed; restore the previous
                   1089:       capture values before returning NOMATCH. */
                   1090: 
                   1091:       for (;;)
                   1092:         {
                   1093:         md->offset_vector[md->offset_end - number] =
                   1094:           (int)(eptr - md->start_subject);
                   1095:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                   1096:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                   1097:           eptrb, RM63);
                   1098:         if (rrc == MATCH_KETRPOS)
                   1099:           {
                   1100:           offset_top = md->end_offset_top;
                   1101:           eptr = md->end_match_ptr;
                   1102:           ecode = md->start_code + code_offset;
                   1103:           save_capture_last = md->capture_last;
                   1104:           matched_once = TRUE;
                   1105:           continue;
                   1106:           }
                   1107: 
                   1108:         /* See comment in the code for capturing groups above about handling
                   1109:         THEN. */
                   1110: 
                   1111:         if (rrc == MATCH_THEN)
                   1112:           {
                   1113:           next = ecode + GET(ecode,1);
                   1114:           if (md->start_match_ptr < next &&
                   1115:               (*ecode == OP_ALT || *next == OP_ALT))
                   1116:             rrc = MATCH_NOMATCH;
                   1117:           }
                   1118: 
                   1119:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1120:         md->capture_last = save_capture_last;
                   1121:         ecode += GET(ecode, 1);
                   1122:         if (*ecode != OP_ALT) break;
                   1123:         }
                   1124: 
                   1125:       if (!matched_once)
                   1126:         {
                   1127:         md->offset_vector[offset] = save_offset1;
                   1128:         md->offset_vector[offset+1] = save_offset2;
                   1129:         md->offset_vector[md->offset_end - number] = save_offset3;
                   1130:         }
                   1131: 
                   1132:       if (allow_zero || matched_once)
                   1133:         {
                   1134:         ecode += 1 + LINK_SIZE;
                   1135:         break;
                   1136:         }
                   1137: 
                   1138:       RRETURN(MATCH_NOMATCH);
                   1139:       }
                   1140: 
                   1141:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1142:     as a non-capturing bracket. */
                   1143: 
                   1144:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1145:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1146: 
                   1147:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1148: 
                   1149:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1150:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1151: 
                   1152:     /* Non-capturing possessive bracket with unlimited repeat. We come here
                   1153:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
                   1154:     without the capturing complication. It is written out separately for speed
                   1155:     and cleanliness. */
                   1156: 
                   1157:     case OP_BRAPOS:
                   1158:     case OP_SBRAPOS:
                   1159:     allow_zero = FALSE;
                   1160: 
                   1161:     POSSESSIVE_NON_CAPTURE:
                   1162:     matched_once = FALSE;
                   1163:     code_offset = (int)(ecode - md->start_code);
                   1164: 
                   1165:     for (;;)
                   1166:       {
                   1167:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                   1168:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                   1169:         eptrb, RM48);
                   1170:       if (rrc == MATCH_KETRPOS)
                   1171:         {
                   1172:         offset_top = md->end_offset_top;
                   1173:         eptr = md->end_match_ptr;
                   1174:         ecode = md->start_code + code_offset;
                   1175:         matched_once = TRUE;
                   1176:         continue;
                   1177:         }
                   1178: 
                   1179:       /* See comment in the code for capturing groups above about handling
                   1180:       THEN. */
                   1181: 
                   1182:       if (rrc == MATCH_THEN)
                   1183:         {
                   1184:         next = ecode + GET(ecode,1);
                   1185:         if (md->start_match_ptr < next &&
                   1186:             (*ecode == OP_ALT || *next == OP_ALT))
                   1187:           rrc = MATCH_NOMATCH;
                   1188:         }
                   1189: 
                   1190:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1191:       ecode += GET(ecode, 1);
                   1192:       if (*ecode != OP_ALT) break;
                   1193:       }
                   1194: 
                   1195:     if (matched_once || allow_zero)
                   1196:       {
                   1197:       ecode += 1 + LINK_SIZE;
                   1198:       break;
                   1199:       }
                   1200:     RRETURN(MATCH_NOMATCH);
                   1201: 
                   1202:     /* Control never reaches here. */
                   1203: 
                   1204:     /* Conditional group: compilation checked that there are no more than
                   1205:     two branches. If the condition is false, skipping the first branch takes us
                   1206:     past the end if there is only one branch, but that's OK because that is
                   1207:     exactly what going to the ket would do. */
                   1208: 
                   1209:     case OP_COND:
                   1210:     case OP_SCOND:
                   1211:     codelink = GET(ecode, 1);
                   1212: 
                   1213:     /* Because of the way auto-callout works during compile, a callout item is
                   1214:     inserted between OP_COND and an assertion condition. */
                   1215: 
                   1216:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                   1217:       {
                   1218:       if (pcre_callout != NULL)
                   1219:         {
                   1220:         pcre_callout_block cb;
                   1221:         cb.version          = 2;   /* Version 1 of the callout block */
                   1222:         cb.callout_number   = ecode[LINK_SIZE+2];
                   1223:         cb.offset_vector    = md->offset_vector;
                   1224:         cb.subject          = (PCRE_SPTR)md->start_subject;
                   1225:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1226:         cb.start_match      = (int)(mstart - md->start_subject);
                   1227:         cb.current_position = (int)(eptr - md->start_subject);
                   1228:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                   1229:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                   1230:         cb.capture_top      = offset_top/2;
                   1231:         cb.capture_last     = md->capture_last;
                   1232:         cb.callout_data     = md->callout_data;
                   1233:         cb.mark             = md->nomatch_mark;
                   1234:         if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
                   1235:         if (rrc < 0) RRETURN(rrc);
                   1236:         }
                   1237:       ecode += _pcre_OP_lengths[OP_CALLOUT];
                   1238:       }
                   1239: 
                   1240:     condcode = ecode[LINK_SIZE+1];
                   1241: 
                   1242:     /* Now see what the actual condition is */
                   1243: 
                   1244:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
                   1245:       {
                   1246:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                   1247:         {
                   1248:         condition = FALSE;
                   1249:         ecode += GET(ecode, 1);
                   1250:         }
                   1251:       else
                   1252:         {
                   1253:         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
                   1254:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
                   1255: 
                   1256:         /* If the test is for recursion into a specific subpattern, and it is
                   1257:         false, but the test was set up by name, scan the table to see if the
                   1258:         name refers to any other numbers, and test them. The condition is true
                   1259:         if any one is set. */
                   1260: 
                   1261:         if (!condition && condcode == OP_NRREF)
                   1262:           {
                   1263:           uschar *slotA = md->name_table;
                   1264:           for (i = 0; i < md->name_count; i++)
                   1265:             {
                   1266:             if (GET2(slotA, 0) == recno) break;
                   1267:             slotA += md->name_entry_size;
                   1268:             }
                   1269: 
                   1270:           /* Found a name for the number - there can be only one; duplicate
                   1271:           names for different numbers are allowed, but not vice versa. First
                   1272:           scan down for duplicates. */
                   1273: 
                   1274:           if (i < md->name_count)
                   1275:             {
                   1276:             uschar *slotB = slotA;
                   1277:             while (slotB > md->name_table)
                   1278:               {
                   1279:               slotB -= md->name_entry_size;
                   1280:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1281:                 {
                   1282:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                   1283:                 if (condition) break;
                   1284:                 }
                   1285:               else break;
                   1286:               }
                   1287: 
                   1288:             /* Scan up for duplicates */
                   1289: 
                   1290:             if (!condition)
                   1291:               {
                   1292:               slotB = slotA;
                   1293:               for (i++; i < md->name_count; i++)
                   1294:                 {
                   1295:                 slotB += md->name_entry_size;
                   1296:                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1297:                   {
                   1298:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                   1299:                   if (condition) break;
                   1300:                   }
                   1301:                 else break;
                   1302:                 }
                   1303:               }
                   1304:             }
                   1305:           }
                   1306: 
                   1307:         /* Chose branch according to the condition */
                   1308: 
                   1309:         ecode += condition? 3 : GET(ecode, 1);
                   1310:         }
                   1311:       }
                   1312: 
                   1313:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
                   1314:       {
                   1315:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1316:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                   1317: 
                   1318:       /* If the numbered capture is unset, but the reference was by name,
                   1319:       scan the table to see if the name refers to any other numbers, and test
                   1320:       them. The condition is true if any one is set. This is tediously similar
                   1321:       to the code above, but not close enough to try to amalgamate. */
                   1322: 
                   1323:       if (!condition && condcode == OP_NCREF)
                   1324:         {
                   1325:         int refno = offset >> 1;
                   1326:         uschar *slotA = md->name_table;
                   1327: 
                   1328:         for (i = 0; i < md->name_count; i++)
                   1329:           {
                   1330:           if (GET2(slotA, 0) == refno) break;
                   1331:           slotA += md->name_entry_size;
                   1332:           }
                   1333: 
                   1334:         /* Found a name for the number - there can be only one; duplicate names
                   1335:         for different numbers are allowed, but not vice versa. First scan down
                   1336:         for duplicates. */
                   1337: 
                   1338:         if (i < md->name_count)
                   1339:           {
                   1340:           uschar *slotB = slotA;
                   1341:           while (slotB > md->name_table)
                   1342:             {
                   1343:             slotB -= md->name_entry_size;
                   1344:             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1345:               {
                   1346:               offset = GET2(slotB, 0) << 1;
                   1347:               condition = offset < offset_top &&
                   1348:                 md->offset_vector[offset] >= 0;
                   1349:               if (condition) break;
                   1350:               }
                   1351:             else break;
                   1352:             }
                   1353: 
                   1354:           /* Scan up for duplicates */
                   1355: 
                   1356:           if (!condition)
                   1357:             {
                   1358:             slotB = slotA;
                   1359:             for (i++; i < md->name_count; i++)
                   1360:               {
                   1361:               slotB += md->name_entry_size;
                   1362:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1363:                 {
                   1364:                 offset = GET2(slotB, 0) << 1;
                   1365:                 condition = offset < offset_top &&
                   1366:                   md->offset_vector[offset] >= 0;
                   1367:                 if (condition) break;
                   1368:                 }
                   1369:               else break;
                   1370:               }
                   1371:             }
                   1372:           }
                   1373:         }
                   1374: 
                   1375:       /* Chose branch according to the condition */
                   1376: 
                   1377:       ecode += condition? 3 : GET(ecode, 1);
                   1378:       }
                   1379: 
                   1380:     else if (condcode == OP_DEF)     /* DEFINE - always false */
                   1381:       {
                   1382:       condition = FALSE;
                   1383:       ecode += GET(ecode, 1);
                   1384:       }
                   1385: 
                   1386:     /* The condition is an assertion. Call match() to evaluate it - setting
                   1387:     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
                   1388:     an assertion. */
                   1389: 
                   1390:     else
                   1391:       {
                   1392:       md->match_function_type = MATCH_CONDASSERT;
                   1393:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
                   1394:       if (rrc == MATCH_MATCH)
                   1395:         {
                   1396:         if (md->end_offset_top > offset_top)
                   1397:           offset_top = md->end_offset_top;  /* Captures may have happened */
                   1398:         condition = TRUE;
                   1399:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1400:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1401:         }
                   1402: 
                   1403:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
                   1404:       assertion; it is therefore treated as NOMATCH. */
                   1405: 
                   1406:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1407:         {
                   1408:         RRETURN(rrc);         /* Need braces because of following else */
                   1409:         }
                   1410:       else
                   1411:         {
                   1412:         condition = FALSE;
                   1413:         ecode += codelink;
                   1414:         }
                   1415:       }
                   1416: 
                   1417:     /* We are now at the branch that is to be obeyed. As there is only one, can
                   1418:     use tail recursion to avoid using another stack frame, except when there is
                   1419:     unlimited repeat of a possibly empty group. In the latter case, a recursive
                   1420:     call to match() is always required, unless the second alternative doesn't
                   1421:     exist, in which case we can just plough on. Note that, for compatibility
                   1422:     with Perl, the | in a conditional group is NOT treated as creating two
                   1423:     alternatives. If a THEN is encountered in the branch, it propagates out to
                   1424:     the enclosing alternative (unless nested in a deeper set of alternatives,
                   1425:     of course). */
                   1426: 
                   1427:     if (condition || *ecode == OP_ALT)
                   1428:       {
                   1429:       if (op != OP_SCOND)
                   1430:         {
                   1431:         ecode += 1 + LINK_SIZE;
                   1432:         goto TAIL_RECURSE;
                   1433:         }
                   1434: 
                   1435:       md->match_function_type = MATCH_CBEGROUP;
                   1436:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
                   1437:       RRETURN(rrc);
                   1438:       }
                   1439: 
                   1440:      /* Condition false & no alternative; continue after the group. */
                   1441: 
                   1442:     else
                   1443:       {
                   1444:       ecode += 1 + LINK_SIZE;
                   1445:       }
                   1446:     break;
                   1447: 
                   1448: 
                   1449:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1450:     to close any currently open capturing brackets. */
                   1451: 
                   1452:     case OP_CLOSE:
                   1453:     number = GET2(ecode, 1);
                   1454:     offset = number << 1;
                   1455: 
                   1456: #ifdef PCRE_DEBUG
                   1457:       printf("end bracket %d at *ACCEPT", number);
                   1458:       printf("\n");
                   1459: #endif
                   1460: 
                   1461:     md->capture_last = number;
                   1462:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1463:       {
                   1464:       md->offset_vector[offset] =
                   1465:         md->offset_vector[md->offset_end - number];
                   1466:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1467:       if (offset_top <= offset) offset_top = offset + 2;
                   1468:       }
                   1469:     ecode += 3;
                   1470:     break;
                   1471: 
                   1472: 
                   1473:     /* End of the pattern, either real or forced. */
                   1474: 
                   1475:     case OP_END:
                   1476:     case OP_ACCEPT:
                   1477:     case OP_ASSERT_ACCEPT:
                   1478: 
                   1479:     /* If we have matched an empty string, fail if not in an assertion and not
                   1480:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
                   1481:     is set and we have matched at the start of the subject. In both cases,
                   1482:     backtracking will then try other alternatives, if any. */
                   1483: 
                   1484:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
                   1485:          md->recursive == NULL &&
                   1486:          (md->notempty ||
                   1487:            (md->notempty_atstart &&
                   1488:              mstart == md->start_subject + md->start_offset)))
                   1489:       RRETURN(MATCH_NOMATCH);
                   1490: 
                   1491:     /* Otherwise, we have a match. */
                   1492: 
                   1493:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1494:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1495:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                   1496: 
                   1497:     /* For some reason, the macros don't work properly if an expression is
                   1498:     given as the argument to RRETURN when the heap is in use. */
                   1499: 
                   1500:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
                   1501:     RRETURN(rrc);
                   1502: 
                   1503:     /* Assertion brackets. Check the alternative branches in turn - the
                   1504:     matching won't pass the KET for an assertion. If any one branch matches,
                   1505:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1506:     start of each branch to move the current point backwards, so the code at
                   1507:     this level is identical to the lookahead case. When the assertion is part
                   1508:     of a condition, we want to return immediately afterwards. The caller of
                   1509:     this incarnation of the match() function will have set MATCH_CONDASSERT in
                   1510:     md->match_function type, and one of these opcodes will be the first opcode
                   1511:     that is processed. We use a local variable that is preserved over calls to
                   1512:     match() to remember this case. */
                   1513: 
                   1514:     case OP_ASSERT:
                   1515:     case OP_ASSERTBACK:
                   1516:     if (md->match_function_type == MATCH_CONDASSERT)
                   1517:       {
                   1518:       condassert = TRUE;
                   1519:       md->match_function_type = 0;
                   1520:       }
                   1521:     else condassert = FALSE;
                   1522: 
                   1523:     do
                   1524:       {
                   1525:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
                   1526:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1527:         {
                   1528:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1529:         break;
                   1530:         }
                   1531: 
                   1532:       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
                   1533:       as NOMATCH. */
                   1534: 
                   1535:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1536:       ecode += GET(ecode, 1);
                   1537:       }
                   1538:     while (*ecode == OP_ALT);
                   1539: 
                   1540:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
                   1541: 
                   1542:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1543: 
                   1544:     if (condassert) RRETURN(MATCH_MATCH);
                   1545: 
                   1546:     /* Continue from after the assertion, updating the offsets high water
                   1547:     mark, since extracts may have been taken during the assertion. */
                   1548: 
                   1549:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1550:     ecode += 1 + LINK_SIZE;
                   1551:     offset_top = md->end_offset_top;
                   1552:     continue;
                   1553: 
                   1554:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
                   1555:     PRUNE, or COMMIT means we must assume failure without checking subsequent
                   1556:     branches. */
                   1557: 
                   1558:     case OP_ASSERT_NOT:
                   1559:     case OP_ASSERTBACK_NOT:
                   1560:     if (md->match_function_type == MATCH_CONDASSERT)
                   1561:       {
                   1562:       condassert = TRUE;
                   1563:       md->match_function_type = 0;
                   1564:       }
                   1565:     else condassert = FALSE;
                   1566: 
                   1567:     do
                   1568:       {
                   1569:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
                   1570:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
                   1571:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
                   1572:         {
                   1573:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1574:         break;
                   1575:         }
                   1576: 
                   1577:       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
                   1578:       as NOMATCH. */
                   1579: 
                   1580:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1581:       ecode += GET(ecode,1);
                   1582:       }
                   1583:     while (*ecode == OP_ALT);
                   1584: 
                   1585:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
                   1586: 
                   1587:     ecode += 1 + LINK_SIZE;
                   1588:     continue;
                   1589: 
                   1590:     /* Move the subject pointer back. This occurs only at the start of
                   1591:     each branch of a lookbehind assertion. If we are too close to the start to
                   1592:     move back, this match function fails. When working with UTF-8 we move
                   1593:     back a number of characters, not bytes. */
                   1594: 
                   1595:     case OP_REVERSE:
                   1596: #ifdef SUPPORT_UTF8
                   1597:     if (utf8)
                   1598:       {
                   1599:       i = GET(ecode, 1);
                   1600:       while (i-- > 0)
                   1601:         {
                   1602:         eptr--;
                   1603:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1604:         BACKCHAR(eptr);
                   1605:         }
                   1606:       }
                   1607:     else
                   1608: #endif
                   1609: 
                   1610:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1611: 
                   1612:       {
                   1613:       eptr -= GET(ecode, 1);
                   1614:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1615:       }
                   1616: 
                   1617:     /* Save the earliest consulted character, then skip to next op code */
                   1618: 
                   1619:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
                   1620:     ecode += 1 + LINK_SIZE;
                   1621:     break;
                   1622: 
                   1623:     /* The callout item calls an external function, if one is provided, passing
                   1624:     details of the match so far. This is mainly for debugging, though the
                   1625:     function is able to force a failure. */
                   1626: 
                   1627:     case OP_CALLOUT:
                   1628:     if (pcre_callout != NULL)
                   1629:       {
                   1630:       pcre_callout_block cb;
                   1631:       cb.version          = 2;   /* Version 1 of the callout block */
                   1632:       cb.callout_number   = ecode[1];
                   1633:       cb.offset_vector    = md->offset_vector;
                   1634:       cb.subject          = (PCRE_SPTR)md->start_subject;
                   1635:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1636:       cb.start_match      = (int)(mstart - md->start_subject);
                   1637:       cb.current_position = (int)(eptr - md->start_subject);
                   1638:       cb.pattern_position = GET(ecode, 2);
                   1639:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1640:       cb.capture_top      = offset_top/2;
                   1641:       cb.capture_last     = md->capture_last;
                   1642:       cb.callout_data     = md->callout_data;
                   1643:       cb.mark             = md->nomatch_mark;
                   1644:       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
                   1645:       if (rrc < 0) RRETURN(rrc);
                   1646:       }
                   1647:     ecode += 2 + 2*LINK_SIZE;
                   1648:     break;
                   1649: 
                   1650:     /* Recursion either matches the current regex, or some subexpression. The
                   1651:     offset data is the offset to the starting bracket from the start of the
                   1652:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1653: 
                   1654:     The state of the capturing groups is preserved over recursion, and
                   1655:     re-instated afterwards. We don't know how many are started and not yet
                   1656:     finished (offset_top records the completed total) so we just have to save
                   1657:     all the potential data. There may be up to 65535 such values, which is too
                   1658:     large to put on the stack, but using malloc for small numbers seems
                   1659:     expensive. As a compromise, the stack is used when there are no more than
                   1660:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
                   1661: 
                   1662:     There are also other values that have to be saved. We use a chained
                   1663:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1664:     for the original version of this logic. It has, however, been hacked around
                   1665:     a lot, so he is not to blame for the current way it works. */
                   1666: 
                   1667:     case OP_RECURSE:
                   1668:       {
                   1669:       recursion_info *ri;
                   1670:       int recno;
                   1671: 
                   1672:       callpat = md->start_code + GET(ecode, 1);
                   1673:       recno = (callpat == md->start_code)? 0 :
                   1674:         GET2(callpat, 1 + LINK_SIZE);
                   1675: 
                   1676:       /* Check for repeating a recursion without advancing the subject pointer.
                   1677:       This should catch convoluted mutual recursions. (Some simple cases are
                   1678:       caught at compile time.) */
                   1679: 
                   1680:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   1681:         if (recno == ri->group_num && eptr == ri->subject_position)
                   1682:           RRETURN(PCRE_ERROR_RECURSELOOP);
                   1683: 
                   1684:       /* Add to "recursing stack" */
                   1685: 
                   1686:       new_recursive.group_num = recno;
                   1687:       new_recursive.subject_position = eptr;
                   1688:       new_recursive.prevrec = md->recursive;
                   1689:       md->recursive = &new_recursive;
                   1690: 
                   1691:       /* Where to continue from afterwards */
                   1692: 
                   1693:       ecode += 1 + LINK_SIZE;
                   1694: 
                   1695:       /* Now save the offset data */
                   1696: 
                   1697:       new_recursive.saved_max = md->offset_end;
                   1698:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1699:         new_recursive.offset_save = stacksave;
                   1700:       else
                   1701:         {
                   1702:         new_recursive.offset_save =
                   1703:           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
                   1704:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1705:         }
                   1706:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1707:             new_recursive.saved_max * sizeof(int));
                   1708: 
                   1709:       /* OK, now we can do the recursion. After processing each alternative,
                   1710:       restore the offset data. If there were nested recursions, md->recursive
                   1711:       might be changed, so reset it before looping. */
                   1712: 
                   1713:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1714:       cbegroup = (*callpat >= OP_SBRA);
                   1715:       do
                   1716:         {
                   1717:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
                   1718:         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
                   1719:           md, eptrb, RM6);
                   1720:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1721:             new_recursive.saved_max * sizeof(int));
                   1722:         md->recursive = new_recursive.prevrec;
                   1723:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1724:           {
                   1725:           DPRINTF(("Recursion matched\n"));
                   1726:           if (new_recursive.offset_save != stacksave)
                   1727:             (pcre_free)(new_recursive.offset_save);
                   1728: 
                   1729:           /* Set where we got to in the subject, and reset the start in case
                   1730:           it was changed by \K. This *is* propagated back out of a recursion,
                   1731:           for Perl compatibility. */
                   1732: 
                   1733:           eptr = md->end_match_ptr;
                   1734:           mstart = md->start_match_ptr;
                   1735:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
                   1736:           }
                   1737: 
                   1738:         /* PCRE does not allow THEN to escape beyond a recursion; it is treated
                   1739:         as NOMATCH. */
                   1740: 
                   1741:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1742:           {
                   1743:           DPRINTF(("Recursion gave error %d\n", rrc));
                   1744:           if (new_recursive.offset_save != stacksave)
                   1745:             (pcre_free)(new_recursive.offset_save);
                   1746:           RRETURN(rrc);
                   1747:           }
                   1748: 
                   1749:         md->recursive = &new_recursive;
                   1750:         callpat += GET(callpat, 1);
                   1751:         }
                   1752:       while (*callpat == OP_ALT);
                   1753: 
                   1754:       DPRINTF(("Recursion didn't match\n"));
                   1755:       md->recursive = new_recursive.prevrec;
                   1756:       if (new_recursive.offset_save != stacksave)
                   1757:         (pcre_free)(new_recursive.offset_save);
                   1758:       RRETURN(MATCH_NOMATCH);
                   1759:       }
                   1760: 
                   1761:     RECURSION_MATCHED:
                   1762:     break;
                   1763: 
                   1764:     /* An alternation is the end of a branch; scan along to find the end of the
                   1765:     bracketed group and go to there. */
                   1766: 
                   1767:     case OP_ALT:
                   1768:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1769:     break;
                   1770: 
                   1771:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1772:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1773:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1774:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1775:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1776: 
                   1777:     case OP_BRAZERO:
                   1778:     next = ecode + 1;
                   1779:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
                   1780:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1781:     do next += GET(next, 1); while (*next == OP_ALT);
                   1782:     ecode = next + 1 + LINK_SIZE;
                   1783:     break;
                   1784: 
                   1785:     case OP_BRAMINZERO:
                   1786:     next = ecode + 1;
                   1787:     do next += GET(next, 1); while (*next == OP_ALT);
                   1788:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
                   1789:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1790:     ecode++;
                   1791:     break;
                   1792: 
                   1793:     case OP_SKIPZERO:
                   1794:     next = ecode+1;
                   1795:     do next += GET(next,1); while (*next == OP_ALT);
                   1796:     ecode = next + 1 + LINK_SIZE;
                   1797:     break;
                   1798: 
                   1799:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
                   1800:     here; just jump to the group, with allow_zero set TRUE. */
                   1801: 
                   1802:     case OP_BRAPOSZERO:
                   1803:     op = *(++ecode);
                   1804:     allow_zero = TRUE;
                   1805:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
                   1806:       goto POSSESSIVE_NON_CAPTURE;
                   1807: 
                   1808:     /* End of a group, repeated or non-repeating. */
                   1809: 
                   1810:     case OP_KET:
                   1811:     case OP_KETRMIN:
                   1812:     case OP_KETRMAX:
                   1813:     case OP_KETRPOS:
                   1814:     prev = ecode - GET(ecode, 1);
                   1815: 
                   1816:     /* If this was a group that remembered the subject start, in order to break
                   1817:     infinite repeats of empty string matches, retrieve the subject start from
                   1818:     the chain. Otherwise, set it NULL. */
                   1819: 
                   1820:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
                   1821:       {
                   1822:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1823:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1824:       }
                   1825:     else saved_eptr = NULL;
                   1826: 
                   1827:     /* If we are at the end of an assertion group or a non-capturing atomic
                   1828:     group, stop matching and return MATCH_MATCH, but record the current high
                   1829:     water mark for use by positive assertions. We also need to record the match
                   1830:     start in case it was changed by \K. */
                   1831: 
                   1832:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
                   1833:          *prev == OP_ONCE_NC)
                   1834:       {
                   1835:       md->end_match_ptr = eptr;      /* For ONCE_NC */
                   1836:       md->end_offset_top = offset_top;
                   1837:       md->start_match_ptr = mstart;
                   1838:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
                   1839:       }
                   1840: 
                   1841:     /* For capturing groups we have to check the group number back at the start
                   1842:     and if necessary complete handling an extraction by setting the offsets and
                   1843:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
                   1844:     into group 0, so it won't be picked up here. Instead, we catch it when the
                   1845:     OP_END is reached. Other recursion is handled here. We just have to record
                   1846:     the current subject position and start match pointer and give a MATCH
                   1847:     return. */
                   1848: 
                   1849:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
                   1850:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
                   1851:       {
                   1852:       number = GET2(prev, 1+LINK_SIZE);
                   1853:       offset = number << 1;
                   1854: 
                   1855: #ifdef PCRE_DEBUG
                   1856:       printf("end bracket %d", number);
                   1857:       printf("\n");
                   1858: #endif
                   1859: 
                   1860:       /* Handle a recursively called group. */
                   1861: 
                   1862:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1863:         {
                   1864:         md->end_match_ptr = eptr;
                   1865:         md->start_match_ptr = mstart;
                   1866:         RRETURN(MATCH_MATCH);
                   1867:         }
                   1868: 
                   1869:       /* Deal with capturing */
                   1870: 
                   1871:       md->capture_last = number;
                   1872:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1873:         {
                   1874:         /* If offset is greater than offset_top, it means that we are
                   1875:         "skipping" a capturing group, and that group's offsets must be marked
                   1876:         unset. In earlier versions of PCRE, all the offsets were unset at the
                   1877:         start of matching, but this doesn't work because atomic groups and
                   1878:         assertions can cause a value to be set that should later be unset.
                   1879:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
                   1880:         part of the atomic group, but this is not on the final matching path,
                   1881:         so must be unset when 2 is set. (If there is no group 2, there is no
                   1882:         problem, because offset_top will then be 2, indicating no capture.) */
                   1883: 
                   1884:         if (offset > offset_top)
                   1885:           {
                   1886:           register int *iptr = md->offset_vector + offset_top;
                   1887:           register int *iend = md->offset_vector + offset;
                   1888:           while (iptr < iend) *iptr++ = -1;
                   1889:           }
                   1890: 
                   1891:         /* Now make the extraction */
                   1892: 
                   1893:         md->offset_vector[offset] =
                   1894:           md->offset_vector[md->offset_end - number];
                   1895:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1896:         if (offset_top <= offset) offset_top = offset + 2;
                   1897:         }
                   1898:       }
                   1899: 
                   1900:     /* For an ordinary non-repeating ket, just continue at this level. This
                   1901:     also happens for a repeating ket if no characters were matched in the
                   1902:     group. This is the forcible breaking of infinite loops as implemented in
                   1903:     Perl 5.005. For a non-repeating atomic group that includes captures,
                   1904:     establish a backup point by processing the rest of the pattern at a lower
                   1905:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
                   1906:     original OP_ONCE level, thereby bypassing intermediate backup points, but
                   1907:     resetting any captures that happened along the way. */
                   1908: 
                   1909:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1910:       {
                   1911:       if (*prev == OP_ONCE)
                   1912:         {
                   1913:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
                   1914:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1915:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   1916:         RRETURN(MATCH_ONCE);
                   1917:         }
                   1918:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
                   1919:       break;
                   1920:       }
                   1921: 
                   1922:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
                   1923:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
                   1924:     at a time from the outer level, thus saving stack. */
                   1925: 
                   1926:     if (*ecode == OP_KETRPOS)
                   1927:       {
                   1928:       md->end_match_ptr = eptr;
                   1929:       md->end_offset_top = offset_top;
                   1930:       RRETURN(MATCH_KETRPOS);
                   1931:       }
                   1932: 
                   1933:     /* The normal repeating kets try the rest of the pattern or restart from
                   1934:     the preceding bracket, in the appropriate order. In the second case, we can
                   1935:     use tail recursion to avoid using another stack frame, unless we have an
                   1936:     an atomic group or an unlimited repeat of a group that can match an empty
                   1937:     string. */
                   1938: 
                   1939:     if (*ecode == OP_KETRMIN)
                   1940:       {
                   1941:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
                   1942:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1943:       if (*prev == OP_ONCE)
                   1944:         {
                   1945:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
                   1946:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1947:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   1948:         RRETURN(MATCH_ONCE);
                   1949:         }
                   1950:       if (*prev >= OP_SBRA)    /* Could match an empty string */
                   1951:         {
                   1952:         md->match_function_type = MATCH_CBEGROUP;
                   1953:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
                   1954:         RRETURN(rrc);
                   1955:         }
                   1956:       ecode = prev;
                   1957:       goto TAIL_RECURSE;
                   1958:       }
                   1959:     else  /* OP_KETRMAX */
                   1960:       {
                   1961:       if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                   1962:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
                   1963:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
                   1964:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1965:       if (*prev == OP_ONCE)
                   1966:         {
                   1967:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
                   1968:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1969:         md->once_target = prev;
                   1970:         RRETURN(MATCH_ONCE);
                   1971:         }
                   1972:       ecode += 1 + LINK_SIZE;
                   1973:       goto TAIL_RECURSE;
                   1974:       }
                   1975:     /* Control never gets here */
                   1976: 
                   1977:     /* Not multiline mode: start of subject assertion, unless notbol. */
                   1978: 
                   1979:     case OP_CIRC:
                   1980:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   1981: 
                   1982:     /* Start of subject assertion */
                   1983: 
                   1984:     case OP_SOD:
                   1985:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
                   1986:     ecode++;
                   1987:     break;
                   1988: 
                   1989:     /* Multiline mode: start of subject unless notbol, or after any newline. */
                   1990: 
                   1991:     case OP_CIRCM:
                   1992:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   1993:     if (eptr != md->start_subject &&
                   1994:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   1995:       RRETURN(MATCH_NOMATCH);
                   1996:     ecode++;
                   1997:     break;
                   1998: 
                   1999:     /* Start of match assertion */
                   2000: 
                   2001:     case OP_SOM:
                   2002:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
                   2003:     ecode++;
                   2004:     break;
                   2005: 
                   2006:     /* Reset the start of match point */
                   2007: 
                   2008:     case OP_SET_SOM:
                   2009:     mstart = eptr;
                   2010:     ecode++;
                   2011:     break;
                   2012: 
                   2013:     /* Multiline mode: assert before any newline, or before end of subject
                   2014:     unless noteol is set. */
                   2015: 
                   2016:     case OP_DOLLM:
                   2017:     if (eptr < md->end_subject)
                   2018:       { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
                   2019:     else
                   2020:       {
                   2021:       if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2022:       SCHECK_PARTIAL();
                   2023:       }
                   2024:     ecode++;
                   2025:     break;
                   2026: 
                   2027:     /* Not multiline mode: assert before a terminating newline or before end of
                   2028:     subject unless noteol is set. */
                   2029: 
                   2030:     case OP_DOLL:
                   2031:     if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2032:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
                   2033: 
                   2034:     /* ... else fall through for endonly */
                   2035: 
                   2036:     /* End of subject assertion (\z) */
                   2037: 
                   2038:     case OP_EOD:
                   2039:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
                   2040:     SCHECK_PARTIAL();
                   2041:     ecode++;
                   2042:     break;
                   2043: 
                   2044:     /* End of subject or ending \n assertion (\Z) */
                   2045: 
                   2046:     case OP_EODN:
                   2047:     ASSERT_NL_OR_EOS:
                   2048:     if (eptr < md->end_subject &&
                   2049:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
                   2050:       RRETURN(MATCH_NOMATCH);
                   2051: 
                   2052:     /* Either at end of string or \n before end. */
                   2053: 
                   2054:     SCHECK_PARTIAL();
                   2055:     ecode++;
                   2056:     break;
                   2057: 
                   2058:     /* Word boundary assertions */
                   2059: 
                   2060:     case OP_NOT_WORD_BOUNDARY:
                   2061:     case OP_WORD_BOUNDARY:
                   2062:       {
                   2063: 
                   2064:       /* Find out if the previous and current characters are "word" characters.
                   2065:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   2066:       be "non-word" characters. Remember the earliest consulted character for
                   2067:       partial matching. */
                   2068: 
                   2069: #ifdef SUPPORT_UTF8
                   2070:       if (utf8)
                   2071:         {
                   2072:         /* Get status of previous character */
                   2073: 
                   2074:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2075:           {
                   2076:           USPTR lastptr = eptr - 1;
                   2077:           while((*lastptr & 0xc0) == 0x80) lastptr--;
                   2078:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
                   2079:           GETCHAR(c, lastptr);
                   2080: #ifdef SUPPORT_UCP
                   2081:           if (md->use_ucp)
                   2082:             {
                   2083:             if (c == '_') prev_is_word = TRUE; else
                   2084:               {
                   2085:               int cat = UCD_CATEGORY(c);
                   2086:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2087:               }
                   2088:             }
                   2089:           else
                   2090: #endif
                   2091:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2092:           }
                   2093: 
                   2094:         /* Get status of next character */
                   2095: 
                   2096:         if (eptr >= md->end_subject)
                   2097:           {
                   2098:           SCHECK_PARTIAL();
                   2099:           cur_is_word = FALSE;
                   2100:           }
                   2101:         else
                   2102:           {
                   2103:           GETCHAR(c, eptr);
                   2104: #ifdef SUPPORT_UCP
                   2105:           if (md->use_ucp)
                   2106:             {
                   2107:             if (c == '_') cur_is_word = TRUE; else
                   2108:               {
                   2109:               int cat = UCD_CATEGORY(c);
                   2110:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2111:               }
                   2112:             }
                   2113:           else
                   2114: #endif
                   2115:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2116:           }
                   2117:         }
                   2118:       else
                   2119: #endif
                   2120: 
                   2121:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   2122:       consistency with the behaviour of \w we do use it in this case. */
                   2123: 
                   2124:         {
                   2125:         /* Get status of previous character */
                   2126: 
                   2127:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2128:           {
                   2129:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   2130: #ifdef SUPPORT_UCP
                   2131:           if (md->use_ucp)
                   2132:             {
                   2133:             c = eptr[-1];
                   2134:             if (c == '_') prev_is_word = TRUE; else
                   2135:               {
                   2136:               int cat = UCD_CATEGORY(c);
                   2137:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2138:               }
                   2139:             }
                   2140:           else
                   2141: #endif
                   2142:           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
                   2143:           }
                   2144: 
                   2145:         /* Get status of next character */
                   2146: 
                   2147:         if (eptr >= md->end_subject)
                   2148:           {
                   2149:           SCHECK_PARTIAL();
                   2150:           cur_is_word = FALSE;
                   2151:           }
                   2152:         else
                   2153: #ifdef SUPPORT_UCP
                   2154:         if (md->use_ucp)
                   2155:           {
                   2156:           c = *eptr;
                   2157:           if (c == '_') cur_is_word = TRUE; else
                   2158:             {
                   2159:             int cat = UCD_CATEGORY(c);
                   2160:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2161:             }
                   2162:           }
                   2163:         else
                   2164: #endif
                   2165:         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
                   2166:         }
                   2167: 
                   2168:       /* Now see if the situation is what we want */
                   2169: 
                   2170:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   2171:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
                   2172:         RRETURN(MATCH_NOMATCH);
                   2173:       }
                   2174:     break;
                   2175: 
                   2176:     /* Match a single character type; inline for speed */
                   2177: 
                   2178:     case OP_ANY:
                   2179:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   2180:     /* Fall through */
                   2181: 
                   2182:     case OP_ALLANY:
                   2183:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2184:       {                            /* not be updated before SCHECK_PARTIAL. */
                   2185:       SCHECK_PARTIAL();
                   2186:       RRETURN(MATCH_NOMATCH);
                   2187:       }
                   2188:     eptr++;
                   2189:     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   2190:     ecode++;
                   2191:     break;
                   2192: 
                   2193:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   2194:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   2195: 
                   2196:     case OP_ANYBYTE:
                   2197:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2198:       {                            /* not be updated before SCHECK_PARTIAL. */
                   2199:       SCHECK_PARTIAL();
                   2200:       RRETURN(MATCH_NOMATCH);
                   2201:       }
                   2202:     eptr++;
                   2203:     ecode++;
                   2204:     break;
                   2205: 
                   2206:     case OP_NOT_DIGIT:
                   2207:     if (eptr >= md->end_subject)
                   2208:       {
                   2209:       SCHECK_PARTIAL();
                   2210:       RRETURN(MATCH_NOMATCH);
                   2211:       }
                   2212:     GETCHARINCTEST(c, eptr);
                   2213:     if (
                   2214: #ifdef SUPPORT_UTF8
                   2215:        c < 256 &&
                   2216: #endif
                   2217:        (md->ctypes[c] & ctype_digit) != 0
                   2218:        )
                   2219:       RRETURN(MATCH_NOMATCH);
                   2220:     ecode++;
                   2221:     break;
                   2222: 
                   2223:     case OP_DIGIT:
                   2224:     if (eptr >= md->end_subject)
                   2225:       {
                   2226:       SCHECK_PARTIAL();
                   2227:       RRETURN(MATCH_NOMATCH);
                   2228:       }
                   2229:     GETCHARINCTEST(c, eptr);
                   2230:     if (
                   2231: #ifdef SUPPORT_UTF8
                   2232:        c >= 256 ||
                   2233: #endif
                   2234:        (md->ctypes[c] & ctype_digit) == 0
                   2235:        )
                   2236:       RRETURN(MATCH_NOMATCH);
                   2237:     ecode++;
                   2238:     break;
                   2239: 
                   2240:     case OP_NOT_WHITESPACE:
                   2241:     if (eptr >= md->end_subject)
                   2242:       {
                   2243:       SCHECK_PARTIAL();
                   2244:       RRETURN(MATCH_NOMATCH);
                   2245:       }
                   2246:     GETCHARINCTEST(c, eptr);
                   2247:     if (
                   2248: #ifdef SUPPORT_UTF8
                   2249:        c < 256 &&
                   2250: #endif
                   2251:        (md->ctypes[c] & ctype_space) != 0
                   2252:        )
                   2253:       RRETURN(MATCH_NOMATCH);
                   2254:     ecode++;
                   2255:     break;
                   2256: 
                   2257:     case OP_WHITESPACE:
                   2258:     if (eptr >= md->end_subject)
                   2259:       {
                   2260:       SCHECK_PARTIAL();
                   2261:       RRETURN(MATCH_NOMATCH);
                   2262:       }
                   2263:     GETCHARINCTEST(c, eptr);
                   2264:     if (
                   2265: #ifdef SUPPORT_UTF8
                   2266:        c >= 256 ||
                   2267: #endif
                   2268:        (md->ctypes[c] & ctype_space) == 0
                   2269:        )
                   2270:       RRETURN(MATCH_NOMATCH);
                   2271:     ecode++;
                   2272:     break;
                   2273: 
                   2274:     case OP_NOT_WORDCHAR:
                   2275:     if (eptr >= md->end_subject)
                   2276:       {
                   2277:       SCHECK_PARTIAL();
                   2278:       RRETURN(MATCH_NOMATCH);
                   2279:       }
                   2280:     GETCHARINCTEST(c, eptr);
                   2281:     if (
                   2282: #ifdef SUPPORT_UTF8
                   2283:        c < 256 &&
                   2284: #endif
                   2285:        (md->ctypes[c] & ctype_word) != 0
                   2286:        )
                   2287:       RRETURN(MATCH_NOMATCH);
                   2288:     ecode++;
                   2289:     break;
                   2290: 
                   2291:     case OP_WORDCHAR:
                   2292:     if (eptr >= md->end_subject)
                   2293:       {
                   2294:       SCHECK_PARTIAL();
                   2295:       RRETURN(MATCH_NOMATCH);
                   2296:       }
                   2297:     GETCHARINCTEST(c, eptr);
                   2298:     if (
                   2299: #ifdef SUPPORT_UTF8
                   2300:        c >= 256 ||
                   2301: #endif
                   2302:        (md->ctypes[c] & ctype_word) == 0
                   2303:        )
                   2304:       RRETURN(MATCH_NOMATCH);
                   2305:     ecode++;
                   2306:     break;
                   2307: 
                   2308:     case OP_ANYNL:
                   2309:     if (eptr >= md->end_subject)
                   2310:       {
                   2311:       SCHECK_PARTIAL();
                   2312:       RRETURN(MATCH_NOMATCH);
                   2313:       }
                   2314:     GETCHARINCTEST(c, eptr);
                   2315:     switch(c)
                   2316:       {
                   2317:       default: RRETURN(MATCH_NOMATCH);
                   2318: 
                   2319:       case 0x000d:
                   2320:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   2321:       break;
                   2322: 
                   2323:       case 0x000a:
                   2324:       break;
                   2325: 
                   2326:       case 0x000b:
                   2327:       case 0x000c:
                   2328:       case 0x0085:
                   2329:       case 0x2028:
                   2330:       case 0x2029:
                   2331:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   2332:       break;
                   2333:       }
                   2334:     ecode++;
                   2335:     break;
                   2336: 
                   2337:     case OP_NOT_HSPACE:
                   2338:     if (eptr >= md->end_subject)
                   2339:       {
                   2340:       SCHECK_PARTIAL();
                   2341:       RRETURN(MATCH_NOMATCH);
                   2342:       }
                   2343:     GETCHARINCTEST(c, eptr);
                   2344:     switch(c)
                   2345:       {
                   2346:       default: break;
                   2347:       case 0x09:      /* HT */
                   2348:       case 0x20:      /* SPACE */
                   2349:       case 0xa0:      /* NBSP */
                   2350:       case 0x1680:    /* OGHAM SPACE MARK */
                   2351:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2352:       case 0x2000:    /* EN QUAD */
                   2353:       case 0x2001:    /* EM QUAD */
                   2354:       case 0x2002:    /* EN SPACE */
                   2355:       case 0x2003:    /* EM SPACE */
                   2356:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2357:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2358:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2359:       case 0x2007:    /* FIGURE SPACE */
                   2360:       case 0x2008:    /* PUNCTUATION SPACE */
                   2361:       case 0x2009:    /* THIN SPACE */
                   2362:       case 0x200A:    /* HAIR SPACE */
                   2363:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2364:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2365:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2366:       RRETURN(MATCH_NOMATCH);
                   2367:       }
                   2368:     ecode++;
                   2369:     break;
                   2370: 
                   2371:     case OP_HSPACE:
                   2372:     if (eptr >= md->end_subject)
                   2373:       {
                   2374:       SCHECK_PARTIAL();
                   2375:       RRETURN(MATCH_NOMATCH);
                   2376:       }
                   2377:     GETCHARINCTEST(c, eptr);
                   2378:     switch(c)
                   2379:       {
                   2380:       default: RRETURN(MATCH_NOMATCH);
                   2381:       case 0x09:      /* HT */
                   2382:       case 0x20:      /* SPACE */
                   2383:       case 0xa0:      /* NBSP */
                   2384:       case 0x1680:    /* OGHAM SPACE MARK */
                   2385:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2386:       case 0x2000:    /* EN QUAD */
                   2387:       case 0x2001:    /* EM QUAD */
                   2388:       case 0x2002:    /* EN SPACE */
                   2389:       case 0x2003:    /* EM SPACE */
                   2390:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2391:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2392:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2393:       case 0x2007:    /* FIGURE SPACE */
                   2394:       case 0x2008:    /* PUNCTUATION SPACE */
                   2395:       case 0x2009:    /* THIN SPACE */
                   2396:       case 0x200A:    /* HAIR SPACE */
                   2397:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2398:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2399:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2400:       break;
                   2401:       }
                   2402:     ecode++;
                   2403:     break;
                   2404: 
                   2405:     case OP_NOT_VSPACE:
                   2406:     if (eptr >= md->end_subject)
                   2407:       {
                   2408:       SCHECK_PARTIAL();
                   2409:       RRETURN(MATCH_NOMATCH);
                   2410:       }
                   2411:     GETCHARINCTEST(c, eptr);
                   2412:     switch(c)
                   2413:       {
                   2414:       default: break;
                   2415:       case 0x0a:      /* LF */
                   2416:       case 0x0b:      /* VT */
                   2417:       case 0x0c:      /* FF */
                   2418:       case 0x0d:      /* CR */
                   2419:       case 0x85:      /* NEL */
                   2420:       case 0x2028:    /* LINE SEPARATOR */
                   2421:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2422:       RRETURN(MATCH_NOMATCH);
                   2423:       }
                   2424:     ecode++;
                   2425:     break;
                   2426: 
                   2427:     case OP_VSPACE:
                   2428:     if (eptr >= md->end_subject)
                   2429:       {
                   2430:       SCHECK_PARTIAL();
                   2431:       RRETURN(MATCH_NOMATCH);
                   2432:       }
                   2433:     GETCHARINCTEST(c, eptr);
                   2434:     switch(c)
                   2435:       {
                   2436:       default: RRETURN(MATCH_NOMATCH);
                   2437:       case 0x0a:      /* LF */
                   2438:       case 0x0b:      /* VT */
                   2439:       case 0x0c:      /* FF */
                   2440:       case 0x0d:      /* CR */
                   2441:       case 0x85:      /* NEL */
                   2442:       case 0x2028:    /* LINE SEPARATOR */
                   2443:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2444:       break;
                   2445:       }
                   2446:     ecode++;
                   2447:     break;
                   2448: 
                   2449: #ifdef SUPPORT_UCP
                   2450:     /* Check the next character by Unicode property. We will get here only
                   2451:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2452: 
                   2453:     case OP_PROP:
                   2454:     case OP_NOTPROP:
                   2455:     if (eptr >= md->end_subject)
                   2456:       {
                   2457:       SCHECK_PARTIAL();
                   2458:       RRETURN(MATCH_NOMATCH);
                   2459:       }
                   2460:     GETCHARINCTEST(c, eptr);
                   2461:       {
                   2462:       const ucd_record *prop = GET_UCD(c);
                   2463: 
                   2464:       switch(ecode[1])
                   2465:         {
                   2466:         case PT_ANY:
                   2467:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
                   2468:         break;
                   2469: 
                   2470:         case PT_LAMP:
                   2471:         if ((prop->chartype == ucp_Lu ||
                   2472:              prop->chartype == ucp_Ll ||
                   2473:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
                   2474:           RRETURN(MATCH_NOMATCH);
                   2475:         break;
                   2476: 
                   2477:         case PT_GC:
                   2478:         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
                   2479:           RRETURN(MATCH_NOMATCH);
                   2480:         break;
                   2481: 
                   2482:         case PT_PC:
                   2483:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
                   2484:           RRETURN(MATCH_NOMATCH);
                   2485:         break;
                   2486: 
                   2487:         case PT_SC:
                   2488:         if ((ecode[2] != prop->script) == (op == OP_PROP))
                   2489:           RRETURN(MATCH_NOMATCH);
                   2490:         break;
                   2491: 
                   2492:         /* These are specials */
                   2493: 
                   2494:         case PT_ALNUM:
                   2495:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   2496:              _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
                   2497:           RRETURN(MATCH_NOMATCH);
                   2498:         break;
                   2499: 
                   2500:         case PT_SPACE:    /* Perl space */
                   2501:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   2502:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2503:                == (op == OP_NOTPROP))
                   2504:           RRETURN(MATCH_NOMATCH);
                   2505:         break;
                   2506: 
                   2507:         case PT_PXSPACE:  /* POSIX space */
                   2508:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   2509:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2510:              c == CHAR_FF || c == CHAR_CR)
                   2511:                == (op == OP_NOTPROP))
                   2512:           RRETURN(MATCH_NOMATCH);
                   2513:         break;
                   2514: 
                   2515:         case PT_WORD:
                   2516:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   2517:              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   2518:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
                   2519:           RRETURN(MATCH_NOMATCH);
                   2520:         break;
                   2521: 
                   2522:         /* This should never occur */
                   2523: 
                   2524:         default:
                   2525:         RRETURN(PCRE_ERROR_INTERNAL);
                   2526:         }
                   2527: 
                   2528:       ecode += 3;
                   2529:       }
                   2530:     break;
                   2531: 
                   2532:     /* Match an extended Unicode sequence. We will get here only if the support
                   2533:     is in the binary; otherwise a compile-time error occurs. */
                   2534: 
                   2535:     case OP_EXTUNI:
                   2536:     if (eptr >= md->end_subject)
                   2537:       {
                   2538:       SCHECK_PARTIAL();
                   2539:       RRETURN(MATCH_NOMATCH);
                   2540:       }
                   2541:     GETCHARINCTEST(c, eptr);
                   2542:     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
                   2543:     while (eptr < md->end_subject)
                   2544:       {
                   2545:       int len = 1;
                   2546:       if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   2547:       if (UCD_CATEGORY(c) != ucp_M) break;
                   2548:       eptr += len;
                   2549:       }
                   2550:     ecode++;
                   2551:     break;
                   2552: #endif
                   2553: 
                   2554: 
                   2555:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2556:     item to see if there is repeat information following. The code is similar
                   2557:     to that for character classes, but repeated for efficiency. Then obey
                   2558:     similar code to character type repeats - written out again for speed.
                   2559:     However, if the referenced string is the empty string, always treat
                   2560:     it as matched, any number of times (otherwise there could be infinite
                   2561:     loops). */
                   2562: 
                   2563:     case OP_REF:
                   2564:     case OP_REFI:
                   2565:     caseless = op == OP_REFI;
                   2566:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   2567:     ecode += 3;
                   2568: 
                   2569:     /* If the reference is unset, there are two possibilities:
                   2570: 
                   2571:     (a) In the default, Perl-compatible state, set the length negative;
                   2572:     this ensures that every attempt at a match fails. We can't just fail
                   2573:     here, because of the possibility of quantifiers with zero minima.
                   2574: 
                   2575:     (b) If the JavaScript compatibility flag is set, set the length to zero
                   2576:     so that the back reference matches an empty string.
                   2577: 
                   2578:     Otherwise, set the length to the length of what was matched by the
                   2579:     referenced subpattern. */
                   2580: 
                   2581:     if (offset >= offset_top || md->offset_vector[offset] < 0)
                   2582:       length = (md->jscript_compat)? 0 : -1;
                   2583:     else
                   2584:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2585: 
                   2586:     /* Set up for repetition, or handle the non-repeated case */
                   2587: 
                   2588:     switch (*ecode)
                   2589:       {
                   2590:       case OP_CRSTAR:
                   2591:       case OP_CRMINSTAR:
                   2592:       case OP_CRPLUS:
                   2593:       case OP_CRMINPLUS:
                   2594:       case OP_CRQUERY:
                   2595:       case OP_CRMINQUERY:
                   2596:       c = *ecode++ - OP_CRSTAR;
                   2597:       minimize = (c & 1) != 0;
                   2598:       min = rep_min[c];                 /* Pick up values from tables; */
                   2599:       max = rep_max[c];                 /* zero for max => infinity */
                   2600:       if (max == 0) max = INT_MAX;
                   2601:       break;
                   2602: 
                   2603:       case OP_CRRANGE:
                   2604:       case OP_CRMINRANGE:
                   2605:       minimize = (*ecode == OP_CRMINRANGE);
                   2606:       min = GET2(ecode, 1);
                   2607:       max = GET2(ecode, 3);
                   2608:       if (max == 0) max = INT_MAX;
                   2609:       ecode += 5;
                   2610:       break;
                   2611: 
                   2612:       default:               /* No repeat follows */
                   2613:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2614:         {
                   2615:         CHECK_PARTIAL();
                   2616:         RRETURN(MATCH_NOMATCH);
                   2617:         }
                   2618:       eptr += length;
                   2619:       continue;              /* With the main loop */
                   2620:       }
                   2621: 
                   2622:     /* Handle repeated back references. If the length of the reference is
                   2623:     zero, just continue with the main loop. */
                   2624: 
                   2625:     if (length == 0) continue;
                   2626: 
                   2627:     /* First, ensure the minimum number of matches are present. We get back
                   2628:     the length of the reference string explicitly rather than passing the
                   2629:     address of eptr, so that eptr can be a register variable. */
                   2630: 
                   2631:     for (i = 1; i <= min; i++)
                   2632:       {
                   2633:       int slength;
                   2634:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2635:         {
                   2636:         CHECK_PARTIAL();
                   2637:         RRETURN(MATCH_NOMATCH);
                   2638:         }
                   2639:       eptr += slength;
                   2640:       }
                   2641: 
                   2642:     /* If min = max, continue at the same level without recursion.
                   2643:     They are not both allowed to be zero. */
                   2644: 
                   2645:     if (min == max) continue;
                   2646: 
                   2647:     /* If minimizing, keep trying and advancing the pointer */
                   2648: 
                   2649:     if (minimize)
                   2650:       {
                   2651:       for (fi = min;; fi++)
                   2652:         {
                   2653:         int slength;
                   2654:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
                   2655:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2656:         if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2657:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2658:           {
                   2659:           CHECK_PARTIAL();
                   2660:           RRETURN(MATCH_NOMATCH);
                   2661:           }
                   2662:         eptr += slength;
                   2663:         }
                   2664:       /* Control never gets here */
                   2665:       }
                   2666: 
                   2667:     /* If maximizing, find the longest string and work backwards */
                   2668: 
                   2669:     else
                   2670:       {
                   2671:       pp = eptr;
                   2672:       for (i = min; i < max; i++)
                   2673:         {
                   2674:         int slength;
                   2675:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2676:           {
                   2677:           CHECK_PARTIAL();
                   2678:           break;
                   2679:           }
                   2680:         eptr += slength;
                   2681:         }
                   2682:       while (eptr >= pp)
                   2683:         {
                   2684:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
                   2685:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2686:         eptr -= length;
                   2687:         }
                   2688:       RRETURN(MATCH_NOMATCH);
                   2689:       }
                   2690:     /* Control never gets here */
                   2691: 
                   2692:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2693:     used when all the characters in the class have values in the range 0-255,
                   2694:     and either the matching is caseful, or the characters are in the range
                   2695:     0-127 when UTF-8 processing is enabled. The only difference between
                   2696:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2697:     encountered.
                   2698: 
                   2699:     First, look past the end of the item to see if there is repeat information
                   2700:     following. Then obey similar code to character type repeats - written out
                   2701:     again for speed. */
                   2702: 
                   2703:     case OP_NCLASS:
                   2704:     case OP_CLASS:
                   2705:       {
                   2706:       data = ecode + 1;                /* Save for matching */
                   2707:       ecode += 33;                     /* Advance past the item */
                   2708: 
                   2709:       switch (*ecode)
                   2710:         {
                   2711:         case OP_CRSTAR:
                   2712:         case OP_CRMINSTAR:
                   2713:         case OP_CRPLUS:
                   2714:         case OP_CRMINPLUS:
                   2715:         case OP_CRQUERY:
                   2716:         case OP_CRMINQUERY:
                   2717:         c = *ecode++ - OP_CRSTAR;
                   2718:         minimize = (c & 1) != 0;
                   2719:         min = rep_min[c];                 /* Pick up values from tables; */
                   2720:         max = rep_max[c];                 /* zero for max => infinity */
                   2721:         if (max == 0) max = INT_MAX;
                   2722:         break;
                   2723: 
                   2724:         case OP_CRRANGE:
                   2725:         case OP_CRMINRANGE:
                   2726:         minimize = (*ecode == OP_CRMINRANGE);
                   2727:         min = GET2(ecode, 1);
                   2728:         max = GET2(ecode, 3);
                   2729:         if (max == 0) max = INT_MAX;
                   2730:         ecode += 5;
                   2731:         break;
                   2732: 
                   2733:         default:               /* No repeat follows */
                   2734:         min = max = 1;
                   2735:         break;
                   2736:         }
                   2737: 
                   2738:       /* First, ensure the minimum number of matches are present. */
                   2739: 
                   2740: #ifdef SUPPORT_UTF8
                   2741:       /* UTF-8 mode */
                   2742:       if (utf8)
                   2743:         {
                   2744:         for (i = 1; i <= min; i++)
                   2745:           {
                   2746:           if (eptr >= md->end_subject)
                   2747:             {
                   2748:             SCHECK_PARTIAL();
                   2749:             RRETURN(MATCH_NOMATCH);
                   2750:             }
                   2751:           GETCHARINC(c, eptr);
                   2752:           if (c > 255)
                   2753:             {
                   2754:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2755:             }
                   2756:           else
                   2757:             {
                   2758:             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   2759:             }
                   2760:           }
                   2761:         }
                   2762:       else
                   2763: #endif
                   2764:       /* Not UTF-8 mode */
                   2765:         {
                   2766:         for (i = 1; i <= min; i++)
                   2767:           {
                   2768:           if (eptr >= md->end_subject)
                   2769:             {
                   2770:             SCHECK_PARTIAL();
                   2771:             RRETURN(MATCH_NOMATCH);
                   2772:             }
                   2773:           c = *eptr++;
                   2774:           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   2775:           }
                   2776:         }
                   2777: 
                   2778:       /* If max == min we can continue with the main loop without the
                   2779:       need to recurse. */
                   2780: 
                   2781:       if (min == max) continue;
                   2782: 
                   2783:       /* If minimizing, keep testing the rest of the expression and advancing
                   2784:       the pointer while it matches the class. */
                   2785: 
                   2786:       if (minimize)
                   2787:         {
                   2788: #ifdef SUPPORT_UTF8
                   2789:         /* UTF-8 mode */
                   2790:         if (utf8)
                   2791:           {
                   2792:           for (fi = min;; fi++)
                   2793:             {
                   2794:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
                   2795:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2796:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2797:             if (eptr >= md->end_subject)
                   2798:               {
                   2799:               SCHECK_PARTIAL();
                   2800:               RRETURN(MATCH_NOMATCH);
                   2801:               }
                   2802:             GETCHARINC(c, eptr);
                   2803:             if (c > 255)
                   2804:               {
                   2805:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2806:               }
                   2807:             else
                   2808:               {
                   2809:               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   2810:               }
                   2811:             }
                   2812:           }
                   2813:         else
                   2814: #endif
                   2815:         /* Not UTF-8 mode */
                   2816:           {
                   2817:           for (fi = min;; fi++)
                   2818:             {
                   2819:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
                   2820:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2821:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2822:             if (eptr >= md->end_subject)
                   2823:               {
                   2824:               SCHECK_PARTIAL();
                   2825:               RRETURN(MATCH_NOMATCH);
                   2826:               }
                   2827:             c = *eptr++;
                   2828:             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   2829:             }
                   2830:           }
                   2831:         /* Control never gets here */
                   2832:         }
                   2833: 
                   2834:       /* If maximizing, find the longest possible run, then work backwards. */
                   2835: 
                   2836:       else
                   2837:         {
                   2838:         pp = eptr;
                   2839: 
                   2840: #ifdef SUPPORT_UTF8
                   2841:         /* UTF-8 mode */
                   2842:         if (utf8)
                   2843:           {
                   2844:           for (i = min; i < max; i++)
                   2845:             {
                   2846:             int len = 1;
                   2847:             if (eptr >= md->end_subject)
                   2848:               {
                   2849:               SCHECK_PARTIAL();
                   2850:               break;
                   2851:               }
                   2852:             GETCHARLEN(c, eptr, len);
                   2853:             if (c > 255)
                   2854:               {
                   2855:               if (op == OP_CLASS) break;
                   2856:               }
                   2857:             else
                   2858:               {
                   2859:               if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2860:               }
                   2861:             eptr += len;
                   2862:             }
                   2863:           for (;;)
                   2864:             {
                   2865:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
                   2866:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2867:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2868:             BACKCHAR(eptr);
                   2869:             }
                   2870:           }
                   2871:         else
                   2872: #endif
                   2873:           /* Not UTF-8 mode */
                   2874:           {
                   2875:           for (i = min; i < max; i++)
                   2876:             {
                   2877:             if (eptr >= md->end_subject)
                   2878:               {
                   2879:               SCHECK_PARTIAL();
                   2880:               break;
                   2881:               }
                   2882:             c = *eptr;
                   2883:             if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2884:             eptr++;
                   2885:             }
                   2886:           while (eptr >= pp)
                   2887:             {
                   2888:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
                   2889:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2890:             eptr--;
                   2891:             }
                   2892:           }
                   2893: 
                   2894:         RRETURN(MATCH_NOMATCH);
                   2895:         }
                   2896:       }
                   2897:     /* Control never gets here */
                   2898: 
                   2899: 
                   2900:     /* Match an extended character class. This opcode is encountered only
                   2901:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   2902:     mode, because Unicode properties are supported in non-UTF-8 mode. */
                   2903: 
                   2904: #ifdef SUPPORT_UTF8
                   2905:     case OP_XCLASS:
                   2906:       {
                   2907:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2908:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2909: 
                   2910:       switch (*ecode)
                   2911:         {
                   2912:         case OP_CRSTAR:
                   2913:         case OP_CRMINSTAR:
                   2914:         case OP_CRPLUS:
                   2915:         case OP_CRMINPLUS:
                   2916:         case OP_CRQUERY:
                   2917:         case OP_CRMINQUERY:
                   2918:         c = *ecode++ - OP_CRSTAR;
                   2919:         minimize = (c & 1) != 0;
                   2920:         min = rep_min[c];                 /* Pick up values from tables; */
                   2921:         max = rep_max[c];                 /* zero for max => infinity */
                   2922:         if (max == 0) max = INT_MAX;
                   2923:         break;
                   2924: 
                   2925:         case OP_CRRANGE:
                   2926:         case OP_CRMINRANGE:
                   2927:         minimize = (*ecode == OP_CRMINRANGE);
                   2928:         min = GET2(ecode, 1);
                   2929:         max = GET2(ecode, 3);
                   2930:         if (max == 0) max = INT_MAX;
                   2931:         ecode += 5;
                   2932:         break;
                   2933: 
                   2934:         default:               /* No repeat follows */
                   2935:         min = max = 1;
                   2936:         break;
                   2937:         }
                   2938: 
                   2939:       /* First, ensure the minimum number of matches are present. */
                   2940: 
                   2941:       for (i = 1; i <= min; i++)
                   2942:         {
                   2943:         if (eptr >= md->end_subject)
                   2944:           {
                   2945:           SCHECK_PARTIAL();
                   2946:           RRETURN(MATCH_NOMATCH);
                   2947:           }
                   2948:         GETCHARINCTEST(c, eptr);
                   2949:         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
                   2950:         }
                   2951: 
                   2952:       /* If max == min we can continue with the main loop without the
                   2953:       need to recurse. */
                   2954: 
                   2955:       if (min == max) continue;
                   2956: 
                   2957:       /* If minimizing, keep testing the rest of the expression and advancing
                   2958:       the pointer while it matches the class. */
                   2959: 
                   2960:       if (minimize)
                   2961:         {
                   2962:         for (fi = min;; fi++)
                   2963:           {
                   2964:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
                   2965:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2966:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2967:           if (eptr >= md->end_subject)
                   2968:             {
                   2969:             SCHECK_PARTIAL();
                   2970:             RRETURN(MATCH_NOMATCH);
                   2971:             }
                   2972:           GETCHARINCTEST(c, eptr);
                   2973:           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
                   2974:           }
                   2975:         /* Control never gets here */
                   2976:         }
                   2977: 
                   2978:       /* If maximizing, find the longest possible run, then work backwards. */
                   2979: 
                   2980:       else
                   2981:         {
                   2982:         pp = eptr;
                   2983:         for (i = min; i < max; i++)
                   2984:           {
                   2985:           int len = 1;
                   2986:           if (eptr >= md->end_subject)
                   2987:             {
                   2988:             SCHECK_PARTIAL();
                   2989:             break;
                   2990:             }
                   2991:           GETCHARLENTEST(c, eptr, len);
                   2992:           if (!_pcre_xclass(c, data)) break;
                   2993:           eptr += len;
                   2994:           }
                   2995:         for(;;)
                   2996:           {
                   2997:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
                   2998:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2999:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3000:           if (utf8) BACKCHAR(eptr);
                   3001:           }
                   3002:         RRETURN(MATCH_NOMATCH);
                   3003:         }
                   3004: 
                   3005:       /* Control never gets here */
                   3006:       }
                   3007: #endif    /* End of XCLASS */
                   3008: 
                   3009:     /* Match a single character, casefully */
                   3010: 
                   3011:     case OP_CHAR:
                   3012: #ifdef SUPPORT_UTF8
                   3013:     if (utf8)
                   3014:       {
                   3015:       length = 1;
                   3016:       ecode++;
                   3017:       GETCHARLEN(fc, ecode, length);
                   3018:       if (length > md->end_subject - eptr)
                   3019:         {
                   3020:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   3021:         RRETURN(MATCH_NOMATCH);
                   3022:         }
                   3023:       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
                   3024:       }
                   3025:     else
                   3026: #endif
                   3027: 
                   3028:     /* Non-UTF-8 mode */
                   3029:       {
                   3030:       if (md->end_subject - eptr < 1)
                   3031:         {
                   3032:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   3033:         RRETURN(MATCH_NOMATCH);
                   3034:         }
                   3035:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
                   3036:       ecode += 2;
                   3037:       }
                   3038:     break;
                   3039: 
                   3040:     /* Match a single character, caselessly. If we are at the end of the
                   3041:     subject, give up immediately. */
                   3042: 
                   3043:     case OP_CHARI:
                   3044:     if (eptr >= md->end_subject)
                   3045:       {
                   3046:       SCHECK_PARTIAL();
                   3047:       RRETURN(MATCH_NOMATCH);
                   3048:       }
                   3049: 
                   3050: #ifdef SUPPORT_UTF8
                   3051:     if (utf8)
                   3052:       {
                   3053:       length = 1;
                   3054:       ecode++;
                   3055:       GETCHARLEN(fc, ecode, length);
                   3056: 
                   3057:       /* If the pattern character's value is < 128, we have only one byte, and
                   3058:       we know that its other case must also be one byte long, so we can use the
                   3059:       fast lookup table. We know that there is at least one byte left in the
                   3060:       subject. */
                   3061: 
                   3062:       if (fc < 128)
                   3063:         {
                   3064:         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   3065:         }
                   3066: 
                   3067:       /* Otherwise we must pick up the subject character. Note that we cannot
                   3068:       use the value of "length" to check for sufficient bytes left, because the
                   3069:       other case of the character may have more or fewer bytes.  */
                   3070: 
                   3071:       else
                   3072:         {
                   3073:         unsigned int dc;
                   3074:         GETCHARINC(dc, eptr);
                   3075:         ecode += length;
                   3076: 
                   3077:         /* If we have Unicode property support, we can use it to test the other
                   3078:         case of the character, if there is one. */
                   3079: 
                   3080:         if (fc != dc)
                   3081:           {
                   3082: #ifdef SUPPORT_UCP
                   3083:           if (dc != UCD_OTHERCASE(fc))
                   3084: #endif
                   3085:             RRETURN(MATCH_NOMATCH);
                   3086:           }
                   3087:         }
                   3088:       }
                   3089:     else
                   3090: #endif   /* SUPPORT_UTF8 */
                   3091: 
                   3092:     /* Non-UTF-8 mode */
                   3093:       {
                   3094:       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   3095:       ecode += 2;
                   3096:       }
                   3097:     break;
                   3098: 
                   3099:     /* Match a single character repeatedly. */
                   3100: 
                   3101:     case OP_EXACT:
                   3102:     case OP_EXACTI:
                   3103:     min = max = GET2(ecode, 1);
                   3104:     ecode += 3;
                   3105:     goto REPEATCHAR;
                   3106: 
                   3107:     case OP_POSUPTO:
                   3108:     case OP_POSUPTOI:
                   3109:     possessive = TRUE;
                   3110:     /* Fall through */
                   3111: 
                   3112:     case OP_UPTO:
                   3113:     case OP_UPTOI:
                   3114:     case OP_MINUPTO:
                   3115:     case OP_MINUPTOI:
                   3116:     min = 0;
                   3117:     max = GET2(ecode, 1);
                   3118:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
                   3119:     ecode += 3;
                   3120:     goto REPEATCHAR;
                   3121: 
                   3122:     case OP_POSSTAR:
                   3123:     case OP_POSSTARI:
                   3124:     possessive = TRUE;
                   3125:     min = 0;
                   3126:     max = INT_MAX;
                   3127:     ecode++;
                   3128:     goto REPEATCHAR;
                   3129: 
                   3130:     case OP_POSPLUS:
                   3131:     case OP_POSPLUSI:
                   3132:     possessive = TRUE;
                   3133:     min = 1;
                   3134:     max = INT_MAX;
                   3135:     ecode++;
                   3136:     goto REPEATCHAR;
                   3137: 
                   3138:     case OP_POSQUERY:
                   3139:     case OP_POSQUERYI:
                   3140:     possessive = TRUE;
                   3141:     min = 0;
                   3142:     max = 1;
                   3143:     ecode++;
                   3144:     goto REPEATCHAR;
                   3145: 
                   3146:     case OP_STAR:
                   3147:     case OP_STARI:
                   3148:     case OP_MINSTAR:
                   3149:     case OP_MINSTARI:
                   3150:     case OP_PLUS:
                   3151:     case OP_PLUSI:
                   3152:     case OP_MINPLUS:
                   3153:     case OP_MINPLUSI:
                   3154:     case OP_QUERY:
                   3155:     case OP_QUERYI:
                   3156:     case OP_MINQUERY:
                   3157:     case OP_MINQUERYI:
                   3158:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
                   3159:     minimize = (c & 1) != 0;
                   3160:     min = rep_min[c];                 /* Pick up values from tables; */
                   3161:     max = rep_max[c];                 /* zero for max => infinity */
                   3162:     if (max == 0) max = INT_MAX;
                   3163: 
                   3164:     /* Common code for all repeated single-character matches. */
                   3165: 
                   3166:     REPEATCHAR:
                   3167: #ifdef SUPPORT_UTF8
                   3168:     if (utf8)
                   3169:       {
                   3170:       length = 1;
                   3171:       charptr = ecode;
                   3172:       GETCHARLEN(fc, ecode, length);
                   3173:       ecode += length;
                   3174: 
                   3175:       /* Handle multibyte character matching specially here. There is
                   3176:       support for caseless matching if UCP support is present. */
                   3177: 
                   3178:       if (length > 1)
                   3179:         {
                   3180: #ifdef SUPPORT_UCP
                   3181:         unsigned int othercase;
                   3182:         if (op >= OP_STARI &&     /* Caseless */
                   3183:             (othercase = UCD_OTHERCASE(fc)) != fc)
                   3184:           oclength = _pcre_ord2utf8(othercase, occhars);
                   3185:         else oclength = 0;
                   3186: #endif  /* SUPPORT_UCP */
                   3187: 
                   3188:         for (i = 1; i <= min; i++)
                   3189:           {
                   3190:           if (eptr <= md->end_subject - length &&
                   3191:             memcmp(eptr, charptr, length) == 0) eptr += length;
                   3192: #ifdef SUPPORT_UCP
                   3193:           else if (oclength > 0 &&
                   3194:                    eptr <= md->end_subject - oclength &&
                   3195:                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   3196: #endif  /* SUPPORT_UCP */
                   3197:           else
                   3198:             {
                   3199:             CHECK_PARTIAL();
                   3200:             RRETURN(MATCH_NOMATCH);
                   3201:             }
                   3202:           }
                   3203: 
                   3204:         if (min == max) continue;
                   3205: 
                   3206:         if (minimize)
                   3207:           {
                   3208:           for (fi = min;; fi++)
                   3209:             {
                   3210:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
                   3211:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3212:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3213:             if (eptr <= md->end_subject - length &&
                   3214:               memcmp(eptr, charptr, length) == 0) eptr += length;
                   3215: #ifdef SUPPORT_UCP
                   3216:             else if (oclength > 0 &&
                   3217:                      eptr <= md->end_subject - oclength &&
                   3218:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   3219: #endif  /* SUPPORT_UCP */
                   3220:             else
                   3221:               {
                   3222:               CHECK_PARTIAL();
                   3223:               RRETURN(MATCH_NOMATCH);
                   3224:               }
                   3225:             }
                   3226:           /* Control never gets here */
                   3227:           }
                   3228: 
                   3229:         else  /* Maximize */
                   3230:           {
                   3231:           pp = eptr;
                   3232:           for (i = min; i < max; i++)
                   3233:             {
                   3234:             if (eptr <= md->end_subject - length &&
                   3235:                 memcmp(eptr, charptr, length) == 0) eptr += length;
                   3236: #ifdef SUPPORT_UCP
                   3237:             else if (oclength > 0 &&
                   3238:                      eptr <= md->end_subject - oclength &&
                   3239:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   3240: #endif  /* SUPPORT_UCP */
                   3241:             else
                   3242:               {
                   3243:               CHECK_PARTIAL();
                   3244:               break;
                   3245:               }
                   3246:             }
                   3247: 
                   3248:           if (possessive) continue;
                   3249: 
                   3250:           for(;;)
                   3251:             {
                   3252:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
                   3253:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3254:             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
                   3255: #ifdef SUPPORT_UCP
                   3256:             eptr--;
                   3257:             BACKCHAR(eptr);
                   3258: #else   /* without SUPPORT_UCP */
                   3259:             eptr -= length;
                   3260: #endif  /* SUPPORT_UCP */
                   3261:             }
                   3262:           }
                   3263:         /* Control never gets here */
                   3264:         }
                   3265: 
                   3266:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   3267:       obey the code as for non-UTF-8 characters below, though in this case the
                   3268:       value of fc will always be < 128. */
                   3269:       }
                   3270:     else
                   3271: #endif  /* SUPPORT_UTF8 */
                   3272: 
                   3273:     /* When not in UTF-8 mode, load a single-byte character. */
                   3274: 
                   3275:     fc = *ecode++;
                   3276: 
                   3277:     /* The value of fc at this point is always less than 256, though we may or
                   3278:     may not be in UTF-8 mode. The code is duplicated for the caseless and
                   3279:     caseful cases, for speed, since matching characters is likely to be quite
                   3280:     common. First, ensure the minimum number of matches are present. If min =
                   3281:     max, continue at the same level without recursing. Otherwise, if
                   3282:     minimizing, keep trying the rest of the expression and advancing one
                   3283:     matching character if failing, up to the maximum. Alternatively, if
                   3284:     maximizing, find the maximum number of characters and work backwards. */
                   3285: 
                   3286:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3287:       max, eptr));
                   3288: 
                   3289:     if (op >= OP_STARI)  /* Caseless */
                   3290:       {
                   3291:       fc = md->lcc[fc];
                   3292:       for (i = 1; i <= min; i++)
                   3293:         {
                   3294:         if (eptr >= md->end_subject)
                   3295:           {
                   3296:           SCHECK_PARTIAL();
                   3297:           RRETURN(MATCH_NOMATCH);
                   3298:           }
                   3299:         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   3300:         }
                   3301:       if (min == max) continue;
                   3302:       if (minimize)
                   3303:         {
                   3304:         for (fi = min;; fi++)
                   3305:           {
                   3306:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
                   3307:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3308:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3309:           if (eptr >= md->end_subject)
                   3310:             {
                   3311:             SCHECK_PARTIAL();
                   3312:             RRETURN(MATCH_NOMATCH);
                   3313:             }
                   3314:           if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   3315:           }
                   3316:         /* Control never gets here */
                   3317:         }
                   3318:       else  /* Maximize */
                   3319:         {
                   3320:         pp = eptr;
                   3321:         for (i = min; i < max; i++)
                   3322:           {
                   3323:           if (eptr >= md->end_subject)
                   3324:             {
                   3325:             SCHECK_PARTIAL();
                   3326:             break;
                   3327:             }
                   3328:           if (fc != md->lcc[*eptr]) break;
                   3329:           eptr++;
                   3330:           }
                   3331: 
                   3332:         if (possessive) continue;
                   3333: 
                   3334:         while (eptr >= pp)
                   3335:           {
                   3336:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
                   3337:           eptr--;
                   3338:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3339:           }
                   3340:         RRETURN(MATCH_NOMATCH);
                   3341:         }
                   3342:       /* Control never gets here */
                   3343:       }
                   3344: 
                   3345:     /* Caseful comparisons (includes all multi-byte characters) */
                   3346: 
                   3347:     else
                   3348:       {
                   3349:       for (i = 1; i <= min; i++)
                   3350:         {
                   3351:         if (eptr >= md->end_subject)
                   3352:           {
                   3353:           SCHECK_PARTIAL();
                   3354:           RRETURN(MATCH_NOMATCH);
                   3355:           }
                   3356:         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
                   3357:         }
                   3358: 
                   3359:       if (min == max) continue;
                   3360: 
                   3361:       if (minimize)
                   3362:         {
                   3363:         for (fi = min;; fi++)
                   3364:           {
                   3365:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
                   3366:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3367:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3368:           if (eptr >= md->end_subject)
                   3369:             {
                   3370:             SCHECK_PARTIAL();
                   3371:             RRETURN(MATCH_NOMATCH);
                   3372:             }
                   3373:           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
                   3374:           }
                   3375:         /* Control never gets here */
                   3376:         }
                   3377:       else  /* Maximize */
                   3378:         {
                   3379:         pp = eptr;
                   3380:         for (i = min; i < max; i++)
                   3381:           {
                   3382:           if (eptr >= md->end_subject)
                   3383:             {
                   3384:             SCHECK_PARTIAL();
                   3385:             break;
                   3386:             }
                   3387:           if (fc != *eptr) break;
                   3388:           eptr++;
                   3389:           }
                   3390:         if (possessive) continue;
                   3391: 
                   3392:         while (eptr >= pp)
                   3393:           {
                   3394:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
                   3395:           eptr--;
                   3396:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3397:           }
                   3398:         RRETURN(MATCH_NOMATCH);
                   3399:         }
                   3400:       }
                   3401:     /* Control never gets here */
                   3402: 
                   3403:     /* Match a negated single one-byte character. The character we are
                   3404:     checking can be multibyte. */
                   3405: 
                   3406:     case OP_NOT:
                   3407:     case OP_NOTI:
                   3408:     if (eptr >= md->end_subject)
                   3409:       {
                   3410:       SCHECK_PARTIAL();
                   3411:       RRETURN(MATCH_NOMATCH);
                   3412:       }
                   3413:     ecode++;
                   3414:     GETCHARINCTEST(c, eptr);
                   3415:     if (op == OP_NOTI)         /* The caseless case */
                   3416:       {
                   3417: #ifdef SUPPORT_UTF8
                   3418:       if (c < 256)
                   3419: #endif
                   3420:       c = md->lcc[c];
                   3421:       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
                   3422:       }
                   3423:     else    /* Caseful */
                   3424:       {
                   3425:       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
                   3426:       }
                   3427:     break;
                   3428: 
                   3429:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3430:     repeat of the code for a repeated single character, but I haven't found a
                   3431:     nice way of commoning these up that doesn't require a test of the
                   3432:     positive/negative option for each character match. Maybe that wouldn't add
                   3433:     very much to the time taken, but character matching *is* what this is all
                   3434:     about... */
                   3435: 
                   3436:     case OP_NOTEXACT:
                   3437:     case OP_NOTEXACTI:
                   3438:     min = max = GET2(ecode, 1);
                   3439:     ecode += 3;
                   3440:     goto REPEATNOTCHAR;
                   3441: 
                   3442:     case OP_NOTUPTO:
                   3443:     case OP_NOTUPTOI:
                   3444:     case OP_NOTMINUPTO:
                   3445:     case OP_NOTMINUPTOI:
                   3446:     min = 0;
                   3447:     max = GET2(ecode, 1);
                   3448:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
                   3449:     ecode += 3;
                   3450:     goto REPEATNOTCHAR;
                   3451: 
                   3452:     case OP_NOTPOSSTAR:
                   3453:     case OP_NOTPOSSTARI:
                   3454:     possessive = TRUE;
                   3455:     min = 0;
                   3456:     max = INT_MAX;
                   3457:     ecode++;
                   3458:     goto REPEATNOTCHAR;
                   3459: 
                   3460:     case OP_NOTPOSPLUS:
                   3461:     case OP_NOTPOSPLUSI:
                   3462:     possessive = TRUE;
                   3463:     min = 1;
                   3464:     max = INT_MAX;
                   3465:     ecode++;
                   3466:     goto REPEATNOTCHAR;
                   3467: 
                   3468:     case OP_NOTPOSQUERY:
                   3469:     case OP_NOTPOSQUERYI:
                   3470:     possessive = TRUE;
                   3471:     min = 0;
                   3472:     max = 1;
                   3473:     ecode++;
                   3474:     goto REPEATNOTCHAR;
                   3475: 
                   3476:     case OP_NOTPOSUPTO:
                   3477:     case OP_NOTPOSUPTOI:
                   3478:     possessive = TRUE;
                   3479:     min = 0;
                   3480:     max = GET2(ecode, 1);
                   3481:     ecode += 3;
                   3482:     goto REPEATNOTCHAR;
                   3483: 
                   3484:     case OP_NOTSTAR:
                   3485:     case OP_NOTSTARI:
                   3486:     case OP_NOTMINSTAR:
                   3487:     case OP_NOTMINSTARI:
                   3488:     case OP_NOTPLUS:
                   3489:     case OP_NOTPLUSI:
                   3490:     case OP_NOTMINPLUS:
                   3491:     case OP_NOTMINPLUSI:
                   3492:     case OP_NOTQUERY:
                   3493:     case OP_NOTQUERYI:
                   3494:     case OP_NOTMINQUERY:
                   3495:     case OP_NOTMINQUERYI:
                   3496:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
                   3497:     minimize = (c & 1) != 0;
                   3498:     min = rep_min[c];                 /* Pick up values from tables; */
                   3499:     max = rep_max[c];                 /* zero for max => infinity */
                   3500:     if (max == 0) max = INT_MAX;
                   3501: 
                   3502:     /* Common code for all repeated single-byte matches. */
                   3503: 
                   3504:     REPEATNOTCHAR:
                   3505:     fc = *ecode++;
                   3506: 
                   3507:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3508:     since matching characters is likely to be quite common. First, ensure the
                   3509:     minimum number of matches are present. If min = max, continue at the same
                   3510:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3511:     the expression and advancing one matching character if failing, up to the
                   3512:     maximum. Alternatively, if maximizing, find the maximum number of
                   3513:     characters and work backwards. */
                   3514: 
                   3515:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3516:       max, eptr));
                   3517: 
                   3518:     if (op >= OP_NOTSTARI)     /* Caseless */
                   3519:       {
                   3520:       fc = md->lcc[fc];
                   3521: 
                   3522: #ifdef SUPPORT_UTF8
                   3523:       /* UTF-8 mode */
                   3524:       if (utf8)
                   3525:         {
                   3526:         register unsigned int d;
                   3527:         for (i = 1; i <= min; i++)
                   3528:           {
                   3529:           if (eptr >= md->end_subject)
                   3530:             {
                   3531:             SCHECK_PARTIAL();
                   3532:             RRETURN(MATCH_NOMATCH);
                   3533:             }
                   3534:           GETCHARINC(d, eptr);
                   3535:           if (d < 256) d = md->lcc[d];
                   3536:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   3537:           }
                   3538:         }
                   3539:       else
                   3540: #endif
                   3541: 
                   3542:       /* Not UTF-8 mode */
                   3543:         {
                   3544:         for (i = 1; i <= min; i++)
                   3545:           {
                   3546:           if (eptr >= md->end_subject)
                   3547:             {
                   3548:             SCHECK_PARTIAL();
                   3549:             RRETURN(MATCH_NOMATCH);
                   3550:             }
                   3551:           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   3552:           }
                   3553:         }
                   3554: 
                   3555:       if (min == max) continue;
                   3556: 
                   3557:       if (minimize)
                   3558:         {
                   3559: #ifdef SUPPORT_UTF8
                   3560:         /* UTF-8 mode */
                   3561:         if (utf8)
                   3562:           {
                   3563:           register unsigned int d;
                   3564:           for (fi = min;; fi++)
                   3565:             {
                   3566:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
                   3567:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3568:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3569:             if (eptr >= md->end_subject)
                   3570:               {
                   3571:               SCHECK_PARTIAL();
                   3572:               RRETURN(MATCH_NOMATCH);
                   3573:               }
                   3574:             GETCHARINC(d, eptr);
                   3575:             if (d < 256) d = md->lcc[d];
                   3576:             if (fc == d) RRETURN(MATCH_NOMATCH);
                   3577:             }
                   3578:           }
                   3579:         else
                   3580: #endif
                   3581:         /* Not UTF-8 mode */
                   3582:           {
                   3583:           for (fi = min;; fi++)
                   3584:             {
                   3585:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
                   3586:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3587:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3588:             if (eptr >= md->end_subject)
                   3589:               {
                   3590:               SCHECK_PARTIAL();
                   3591:               RRETURN(MATCH_NOMATCH);
                   3592:               }
                   3593:             if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   3594:             }
                   3595:           }
                   3596:         /* Control never gets here */
                   3597:         }
                   3598: 
                   3599:       /* Maximize case */
                   3600: 
                   3601:       else
                   3602:         {
                   3603:         pp = eptr;
                   3604: 
                   3605: #ifdef SUPPORT_UTF8
                   3606:         /* UTF-8 mode */
                   3607:         if (utf8)
                   3608:           {
                   3609:           register unsigned int d;
                   3610:           for (i = min; i < max; i++)
                   3611:             {
                   3612:             int len = 1;
                   3613:             if (eptr >= md->end_subject)
                   3614:               {
                   3615:               SCHECK_PARTIAL();
                   3616:               break;
                   3617:               }
                   3618:             GETCHARLEN(d, eptr, len);
                   3619:             if (d < 256) d = md->lcc[d];
                   3620:             if (fc == d) break;
                   3621:             eptr += len;
                   3622:             }
                   3623:         if (possessive) continue;
                   3624:         for(;;)
                   3625:             {
                   3626:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
                   3627:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3628:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3629:             BACKCHAR(eptr);
                   3630:             }
                   3631:           }
                   3632:         else
                   3633: #endif
                   3634:         /* Not UTF-8 mode */
                   3635:           {
                   3636:           for (i = min; i < max; i++)
                   3637:             {
                   3638:             if (eptr >= md->end_subject)
                   3639:               {
                   3640:               SCHECK_PARTIAL();
                   3641:               break;
                   3642:               }
                   3643:             if (fc == md->lcc[*eptr]) break;
                   3644:             eptr++;
                   3645:             }
                   3646:           if (possessive) continue;
                   3647:           while (eptr >= pp)
                   3648:             {
                   3649:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
                   3650:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3651:             eptr--;
                   3652:             }
                   3653:           }
                   3654: 
                   3655:         RRETURN(MATCH_NOMATCH);
                   3656:         }
                   3657:       /* Control never gets here */
                   3658:       }
                   3659: 
                   3660:     /* Caseful comparisons */
                   3661: 
                   3662:     else
                   3663:       {
                   3664: #ifdef SUPPORT_UTF8
                   3665:       /* UTF-8 mode */
                   3666:       if (utf8)
                   3667:         {
                   3668:         register unsigned int d;
                   3669:         for (i = 1; i <= min; i++)
                   3670:           {
                   3671:           if (eptr >= md->end_subject)
                   3672:             {
                   3673:             SCHECK_PARTIAL();
                   3674:             RRETURN(MATCH_NOMATCH);
                   3675:             }
                   3676:           GETCHARINC(d, eptr);
                   3677:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   3678:           }
                   3679:         }
                   3680:       else
                   3681: #endif
                   3682:       /* Not UTF-8 mode */
                   3683:         {
                   3684:         for (i = 1; i <= min; i++)
                   3685:           {
                   3686:           if (eptr >= md->end_subject)
                   3687:             {
                   3688:             SCHECK_PARTIAL();
                   3689:             RRETURN(MATCH_NOMATCH);
                   3690:             }
                   3691:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   3692:           }
                   3693:         }
                   3694: 
                   3695:       if (min == max) continue;
                   3696: 
                   3697:       if (minimize)
                   3698:         {
                   3699: #ifdef SUPPORT_UTF8
                   3700:         /* UTF-8 mode */
                   3701:         if (utf8)
                   3702:           {
                   3703:           register unsigned int d;
                   3704:           for (fi = min;; fi++)
                   3705:             {
                   3706:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
                   3707:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3708:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3709:             if (eptr >= md->end_subject)
                   3710:               {
                   3711:               SCHECK_PARTIAL();
                   3712:               RRETURN(MATCH_NOMATCH);
                   3713:               }
                   3714:             GETCHARINC(d, eptr);
                   3715:             if (fc == d) RRETURN(MATCH_NOMATCH);
                   3716:             }
                   3717:           }
                   3718:         else
                   3719: #endif
                   3720:         /* Not UTF-8 mode */
                   3721:           {
                   3722:           for (fi = min;; fi++)
                   3723:             {
                   3724:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
                   3725:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3726:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3727:             if (eptr >= md->end_subject)
                   3728:               {
                   3729:               SCHECK_PARTIAL();
                   3730:               RRETURN(MATCH_NOMATCH);
                   3731:               }
                   3732:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   3733:             }
                   3734:           }
                   3735:         /* Control never gets here */
                   3736:         }
                   3737: 
                   3738:       /* Maximize case */
                   3739: 
                   3740:       else
                   3741:         {
                   3742:         pp = eptr;
                   3743: 
                   3744: #ifdef SUPPORT_UTF8
                   3745:         /* UTF-8 mode */
                   3746:         if (utf8)
                   3747:           {
                   3748:           register unsigned int d;
                   3749:           for (i = min; i < max; i++)
                   3750:             {
                   3751:             int len = 1;
                   3752:             if (eptr >= md->end_subject)
                   3753:               {
                   3754:               SCHECK_PARTIAL();
                   3755:               break;
                   3756:               }
                   3757:             GETCHARLEN(d, eptr, len);
                   3758:             if (fc == d) break;
                   3759:             eptr += len;
                   3760:             }
                   3761:           if (possessive) continue;
                   3762:           for(;;)
                   3763:             {
                   3764:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
                   3765:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3766:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3767:             BACKCHAR(eptr);
                   3768:             }
                   3769:           }
                   3770:         else
                   3771: #endif
                   3772:         /* Not UTF-8 mode */
                   3773:           {
                   3774:           for (i = min; i < max; i++)
                   3775:             {
                   3776:             if (eptr >= md->end_subject)
                   3777:               {
                   3778:               SCHECK_PARTIAL();
                   3779:               break;
                   3780:               }
                   3781:             if (fc == *eptr) break;
                   3782:             eptr++;
                   3783:             }
                   3784:           if (possessive) continue;
                   3785:           while (eptr >= pp)
                   3786:             {
                   3787:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
                   3788:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3789:             eptr--;
                   3790:             }
                   3791:           }
                   3792: 
                   3793:         RRETURN(MATCH_NOMATCH);
                   3794:         }
                   3795:       }
                   3796:     /* Control never gets here */
                   3797: 
                   3798:     /* Match a single character type repeatedly; several different opcodes
                   3799:     share code. This is very similar to the code for single characters, but we
                   3800:     repeat it in the interests of efficiency. */
                   3801: 
                   3802:     case OP_TYPEEXACT:
                   3803:     min = max = GET2(ecode, 1);
                   3804:     minimize = TRUE;
                   3805:     ecode += 3;
                   3806:     goto REPEATTYPE;
                   3807: 
                   3808:     case OP_TYPEUPTO:
                   3809:     case OP_TYPEMINUPTO:
                   3810:     min = 0;
                   3811:     max = GET2(ecode, 1);
                   3812:     minimize = *ecode == OP_TYPEMINUPTO;
                   3813:     ecode += 3;
                   3814:     goto REPEATTYPE;
                   3815: 
                   3816:     case OP_TYPEPOSSTAR:
                   3817:     possessive = TRUE;
                   3818:     min = 0;
                   3819:     max = INT_MAX;
                   3820:     ecode++;
                   3821:     goto REPEATTYPE;
                   3822: 
                   3823:     case OP_TYPEPOSPLUS:
                   3824:     possessive = TRUE;
                   3825:     min = 1;
                   3826:     max = INT_MAX;
                   3827:     ecode++;
                   3828:     goto REPEATTYPE;
                   3829: 
                   3830:     case OP_TYPEPOSQUERY:
                   3831:     possessive = TRUE;
                   3832:     min = 0;
                   3833:     max = 1;
                   3834:     ecode++;
                   3835:     goto REPEATTYPE;
                   3836: 
                   3837:     case OP_TYPEPOSUPTO:
                   3838:     possessive = TRUE;
                   3839:     min = 0;
                   3840:     max = GET2(ecode, 1);
                   3841:     ecode += 3;
                   3842:     goto REPEATTYPE;
                   3843: 
                   3844:     case OP_TYPESTAR:
                   3845:     case OP_TYPEMINSTAR:
                   3846:     case OP_TYPEPLUS:
                   3847:     case OP_TYPEMINPLUS:
                   3848:     case OP_TYPEQUERY:
                   3849:     case OP_TYPEMINQUERY:
                   3850:     c = *ecode++ - OP_TYPESTAR;
                   3851:     minimize = (c & 1) != 0;
                   3852:     min = rep_min[c];                 /* Pick up values from tables; */
                   3853:     max = rep_max[c];                 /* zero for max => infinity */
                   3854:     if (max == 0) max = INT_MAX;
                   3855: 
                   3856:     /* Common code for all repeated single character type matches. Note that
                   3857:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   3858:     character types, the valid characters are all one-byte long. */
                   3859: 
                   3860:     REPEATTYPE:
                   3861:     ctype = *ecode++;      /* Code for the character type */
                   3862: 
                   3863: #ifdef SUPPORT_UCP
                   3864:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   3865:       {
                   3866:       prop_fail_result = ctype == OP_NOTPROP;
                   3867:       prop_type = *ecode++;
                   3868:       prop_value = *ecode++;
                   3869:       }
                   3870:     else prop_type = -1;
                   3871: #endif
                   3872: 
                   3873:     /* First, ensure the minimum number of matches are present. Use inline
                   3874:     code for maximizing the speed, and do the type test once at the start
                   3875:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
                   3876:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   3877:     and single-bytes. */
                   3878: 
                   3879:     if (min > 0)
                   3880:       {
                   3881: #ifdef SUPPORT_UCP
                   3882:       if (prop_type >= 0)
                   3883:         {
                   3884:         switch(prop_type)
                   3885:           {
                   3886:           case PT_ANY:
                   3887:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   3888:           for (i = 1; i <= min; i++)
                   3889:             {
                   3890:             if (eptr >= md->end_subject)
                   3891:               {
                   3892:               SCHECK_PARTIAL();
                   3893:               RRETURN(MATCH_NOMATCH);
                   3894:               }
                   3895:             GETCHARINCTEST(c, eptr);
                   3896:             }
                   3897:           break;
                   3898: 
                   3899:           case PT_LAMP:
                   3900:           for (i = 1; i <= min; i++)
                   3901:             {
                   3902:             int chartype;
                   3903:             if (eptr >= md->end_subject)
                   3904:               {
                   3905:               SCHECK_PARTIAL();
                   3906:               RRETURN(MATCH_NOMATCH);
                   3907:               }
                   3908:             GETCHARINCTEST(c, eptr);
                   3909:             chartype = UCD_CHARTYPE(c);
                   3910:             if ((chartype == ucp_Lu ||
                   3911:                  chartype == ucp_Ll ||
                   3912:                  chartype == ucp_Lt) == prop_fail_result)
                   3913:               RRETURN(MATCH_NOMATCH);
                   3914:             }
                   3915:           break;
                   3916: 
                   3917:           case PT_GC:
                   3918:           for (i = 1; i <= min; i++)
                   3919:             {
                   3920:             if (eptr >= md->end_subject)
                   3921:               {
                   3922:               SCHECK_PARTIAL();
                   3923:               RRETURN(MATCH_NOMATCH);
                   3924:               }
                   3925:             GETCHARINCTEST(c, eptr);
                   3926:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   3927:               RRETURN(MATCH_NOMATCH);
                   3928:             }
                   3929:           break;
                   3930: 
                   3931:           case PT_PC:
                   3932:           for (i = 1; i <= min; i++)
                   3933:             {
                   3934:             if (eptr >= md->end_subject)
                   3935:               {
                   3936:               SCHECK_PARTIAL();
                   3937:               RRETURN(MATCH_NOMATCH);
                   3938:               }
                   3939:             GETCHARINCTEST(c, eptr);
                   3940:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   3941:               RRETURN(MATCH_NOMATCH);
                   3942:             }
                   3943:           break;
                   3944: 
                   3945:           case PT_SC:
                   3946:           for (i = 1; i <= min; i++)
                   3947:             {
                   3948:             if (eptr >= md->end_subject)
                   3949:               {
                   3950:               SCHECK_PARTIAL();
                   3951:               RRETURN(MATCH_NOMATCH);
                   3952:               }
                   3953:             GETCHARINCTEST(c, eptr);
                   3954:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   3955:               RRETURN(MATCH_NOMATCH);
                   3956:             }
                   3957:           break;
                   3958: 
                   3959:           case PT_ALNUM:
                   3960:           for (i = 1; i <= min; i++)
                   3961:             {
                   3962:             int category;
                   3963:             if (eptr >= md->end_subject)
                   3964:               {
                   3965:               SCHECK_PARTIAL();
                   3966:               RRETURN(MATCH_NOMATCH);
                   3967:               }
                   3968:             GETCHARINCTEST(c, eptr);
                   3969:             category = UCD_CATEGORY(c);
                   3970:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   3971:               RRETURN(MATCH_NOMATCH);
                   3972:             }
                   3973:           break;
                   3974: 
                   3975:           case PT_SPACE:    /* Perl space */
                   3976:           for (i = 1; i <= min; i++)
                   3977:             {
                   3978:             if (eptr >= md->end_subject)
                   3979:               {
                   3980:               SCHECK_PARTIAL();
                   3981:               RRETURN(MATCH_NOMATCH);
                   3982:               }
                   3983:             GETCHARINCTEST(c, eptr);
                   3984:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   3985:                  c == CHAR_FF || c == CHAR_CR)
                   3986:                    == prop_fail_result)
                   3987:               RRETURN(MATCH_NOMATCH);
                   3988:             }
                   3989:           break;
                   3990: 
                   3991:           case PT_PXSPACE:  /* POSIX space */
                   3992:           for (i = 1; i <= min; i++)
                   3993:             {
                   3994:             if (eptr >= md->end_subject)
                   3995:               {
                   3996:               SCHECK_PARTIAL();
                   3997:               RRETURN(MATCH_NOMATCH);
                   3998:               }
                   3999:             GETCHARINCTEST(c, eptr);
                   4000:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4001:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4002:                    == prop_fail_result)
                   4003:               RRETURN(MATCH_NOMATCH);
                   4004:             }
                   4005:           break;
                   4006: 
                   4007:           case PT_WORD:
                   4008:           for (i = 1; i <= min; i++)
                   4009:             {
                   4010:             int category;
                   4011:             if (eptr >= md->end_subject)
                   4012:               {
                   4013:               SCHECK_PARTIAL();
                   4014:               RRETURN(MATCH_NOMATCH);
                   4015:               }
                   4016:             GETCHARINCTEST(c, eptr);
                   4017:             category = UCD_CATEGORY(c);
                   4018:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                   4019:                    == prop_fail_result)
                   4020:               RRETURN(MATCH_NOMATCH);
                   4021:             }
                   4022:           break;
                   4023: 
                   4024:           /* This should not occur */
                   4025: 
                   4026:           default:
                   4027:           RRETURN(PCRE_ERROR_INTERNAL);
                   4028:           }
                   4029:         }
                   4030: 
                   4031:       /* Match extended Unicode sequences. We will get here only if the
                   4032:       support is in the binary; otherwise a compile-time error occurs. */
                   4033: 
                   4034:       else if (ctype == OP_EXTUNI)
                   4035:         {
                   4036:         for (i = 1; i <= min; i++)
                   4037:           {
                   4038:           if (eptr >= md->end_subject)
                   4039:             {
                   4040:             SCHECK_PARTIAL();
                   4041:             RRETURN(MATCH_NOMATCH);
                   4042:             }
                   4043:           GETCHARINCTEST(c, eptr);
                   4044:           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
                   4045:           while (eptr < md->end_subject)
                   4046:             {
                   4047:             int len = 1;
                   4048:             if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   4049:             if (UCD_CATEGORY(c) != ucp_M) break;
                   4050:             eptr += len;
                   4051:             }
                   4052:           }
                   4053:         }
                   4054: 
                   4055:       else
                   4056: #endif     /* SUPPORT_UCP */
                   4057: 
                   4058: /* Handle all other cases when the coding is UTF-8 */
                   4059: 
                   4060: #ifdef SUPPORT_UTF8
                   4061:       if (utf8) switch(ctype)
                   4062:         {
                   4063:         case OP_ANY:
                   4064:         for (i = 1; i <= min; i++)
                   4065:           {
                   4066:           if (eptr >= md->end_subject)
                   4067:             {
                   4068:             SCHECK_PARTIAL();
                   4069:             RRETURN(MATCH_NOMATCH);
                   4070:             }
                   4071:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   4072:           eptr++;
                   4073:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   4074:           }
                   4075:         break;
                   4076: 
                   4077:         case OP_ALLANY:
                   4078:         for (i = 1; i <= min; i++)
                   4079:           {
                   4080:           if (eptr >= md->end_subject)
                   4081:             {
                   4082:             SCHECK_PARTIAL();
                   4083:             RRETURN(MATCH_NOMATCH);
                   4084:             }
                   4085:           eptr++;
                   4086:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   4087:           }
                   4088:         break;
                   4089: 
                   4090:         case OP_ANYBYTE:
                   4091:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
                   4092:         eptr += min;
                   4093:         break;
                   4094: 
                   4095:         case OP_ANYNL:
                   4096:         for (i = 1; i <= min; i++)
                   4097:           {
                   4098:           if (eptr >= md->end_subject)
                   4099:             {
                   4100:             SCHECK_PARTIAL();
                   4101:             RRETURN(MATCH_NOMATCH);
                   4102:             }
                   4103:           GETCHARINC(c, eptr);
                   4104:           switch(c)
                   4105:             {
                   4106:             default: RRETURN(MATCH_NOMATCH);
                   4107: 
                   4108:             case 0x000d:
                   4109:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4110:             break;
                   4111: 
                   4112:             case 0x000a:
                   4113:             break;
                   4114: 
                   4115:             case 0x000b:
                   4116:             case 0x000c:
                   4117:             case 0x0085:
                   4118:             case 0x2028:
                   4119:             case 0x2029:
                   4120:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4121:             break;
                   4122:             }
                   4123:           }
                   4124:         break;
                   4125: 
                   4126:         case OP_NOT_HSPACE:
                   4127:         for (i = 1; i <= min; i++)
                   4128:           {
                   4129:           if (eptr >= md->end_subject)
                   4130:             {
                   4131:             SCHECK_PARTIAL();
                   4132:             RRETURN(MATCH_NOMATCH);
                   4133:             }
                   4134:           GETCHARINC(c, eptr);
                   4135:           switch(c)
                   4136:             {
                   4137:             default: break;
                   4138:             case 0x09:      /* HT */
                   4139:             case 0x20:      /* SPACE */
                   4140:             case 0xa0:      /* NBSP */
                   4141:             case 0x1680:    /* OGHAM SPACE MARK */
                   4142:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4143:             case 0x2000:    /* EN QUAD */
                   4144:             case 0x2001:    /* EM QUAD */
                   4145:             case 0x2002:    /* EN SPACE */
                   4146:             case 0x2003:    /* EM SPACE */
                   4147:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4148:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4149:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4150:             case 0x2007:    /* FIGURE SPACE */
                   4151:             case 0x2008:    /* PUNCTUATION SPACE */
                   4152:             case 0x2009:    /* THIN SPACE */
                   4153:             case 0x200A:    /* HAIR SPACE */
                   4154:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4155:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4156:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4157:             RRETURN(MATCH_NOMATCH);
                   4158:             }
                   4159:           }
                   4160:         break;
                   4161: 
                   4162:         case OP_HSPACE:
                   4163:         for (i = 1; i <= min; i++)
                   4164:           {
                   4165:           if (eptr >= md->end_subject)
                   4166:             {
                   4167:             SCHECK_PARTIAL();
                   4168:             RRETURN(MATCH_NOMATCH);
                   4169:             }
                   4170:           GETCHARINC(c, eptr);
                   4171:           switch(c)
                   4172:             {
                   4173:             default: RRETURN(MATCH_NOMATCH);
                   4174:             case 0x09:      /* HT */
                   4175:             case 0x20:      /* SPACE */
                   4176:             case 0xa0:      /* NBSP */
                   4177:             case 0x1680:    /* OGHAM SPACE MARK */
                   4178:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4179:             case 0x2000:    /* EN QUAD */
                   4180:             case 0x2001:    /* EM QUAD */
                   4181:             case 0x2002:    /* EN SPACE */
                   4182:             case 0x2003:    /* EM SPACE */
                   4183:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4184:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4185:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4186:             case 0x2007:    /* FIGURE SPACE */
                   4187:             case 0x2008:    /* PUNCTUATION SPACE */
                   4188:             case 0x2009:    /* THIN SPACE */
                   4189:             case 0x200A:    /* HAIR SPACE */
                   4190:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4191:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4192:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4193:             break;
                   4194:             }
                   4195:           }
                   4196:         break;
                   4197: 
                   4198:         case OP_NOT_VSPACE:
                   4199:         for (i = 1; i <= min; i++)
                   4200:           {
                   4201:           if (eptr >= md->end_subject)
                   4202:             {
                   4203:             SCHECK_PARTIAL();
                   4204:             RRETURN(MATCH_NOMATCH);
                   4205:             }
                   4206:           GETCHARINC(c, eptr);
                   4207:           switch(c)
                   4208:             {
                   4209:             default: break;
                   4210:             case 0x0a:      /* LF */
                   4211:             case 0x0b:      /* VT */
                   4212:             case 0x0c:      /* FF */
                   4213:             case 0x0d:      /* CR */
                   4214:             case 0x85:      /* NEL */
                   4215:             case 0x2028:    /* LINE SEPARATOR */
                   4216:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4217:             RRETURN(MATCH_NOMATCH);
                   4218:             }
                   4219:           }
                   4220:         break;
                   4221: 
                   4222:         case OP_VSPACE:
                   4223:         for (i = 1; i <= min; i++)
                   4224:           {
                   4225:           if (eptr >= md->end_subject)
                   4226:             {
                   4227:             SCHECK_PARTIAL();
                   4228:             RRETURN(MATCH_NOMATCH);
                   4229:             }
                   4230:           GETCHARINC(c, eptr);
                   4231:           switch(c)
                   4232:             {
                   4233:             default: RRETURN(MATCH_NOMATCH);
                   4234:             case 0x0a:      /* LF */
                   4235:             case 0x0b:      /* VT */
                   4236:             case 0x0c:      /* FF */
                   4237:             case 0x0d:      /* CR */
                   4238:             case 0x85:      /* NEL */
                   4239:             case 0x2028:    /* LINE SEPARATOR */
                   4240:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4241:             break;
                   4242:             }
                   4243:           }
                   4244:         break;
                   4245: 
                   4246:         case OP_NOT_DIGIT:
                   4247:         for (i = 1; i <= min; i++)
                   4248:           {
                   4249:           if (eptr >= md->end_subject)
                   4250:             {
                   4251:             SCHECK_PARTIAL();
                   4252:             RRETURN(MATCH_NOMATCH);
                   4253:             }
                   4254:           GETCHARINC(c, eptr);
                   4255:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
                   4256:             RRETURN(MATCH_NOMATCH);
                   4257:           }
                   4258:         break;
                   4259: 
                   4260:         case OP_DIGIT:
                   4261:         for (i = 1; i <= min; i++)
                   4262:           {
                   4263:           if (eptr >= md->end_subject)
                   4264:             {
                   4265:             SCHECK_PARTIAL();
                   4266:             RRETURN(MATCH_NOMATCH);
                   4267:             }
                   4268:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
                   4269:             RRETURN(MATCH_NOMATCH);
                   4270:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4271:           }
                   4272:         break;
                   4273: 
                   4274:         case OP_NOT_WHITESPACE:
                   4275:         for (i = 1; i <= min; i++)
                   4276:           {
                   4277:           if (eptr >= md->end_subject)
                   4278:             {
                   4279:             SCHECK_PARTIAL();
                   4280:             RRETURN(MATCH_NOMATCH);
                   4281:             }
                   4282:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
                   4283:             RRETURN(MATCH_NOMATCH);
                   4284:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   4285:           }
                   4286:         break;
                   4287: 
                   4288:         case OP_WHITESPACE:
                   4289:         for (i = 1; i <= min; i++)
                   4290:           {
                   4291:           if (eptr >= md->end_subject)
                   4292:             {
                   4293:             SCHECK_PARTIAL();
                   4294:             RRETURN(MATCH_NOMATCH);
                   4295:             }
                   4296:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
                   4297:             RRETURN(MATCH_NOMATCH);
                   4298:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4299:           }
                   4300:         break;
                   4301: 
                   4302:         case OP_NOT_WORDCHAR:
                   4303:         for (i = 1; i <= min; i++)
                   4304:           {
                   4305:           if (eptr >= md->end_subject)
                   4306:             {
                   4307:             SCHECK_PARTIAL();
                   4308:             RRETURN(MATCH_NOMATCH);
                   4309:             }
                   4310:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
                   4311:             RRETURN(MATCH_NOMATCH);
                   4312:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   4313:           }
                   4314:         break;
                   4315: 
                   4316:         case OP_WORDCHAR:
                   4317:         for (i = 1; i <= min; i++)
                   4318:           {
                   4319:           if (eptr >= md->end_subject)
                   4320:             {
                   4321:             SCHECK_PARTIAL();
                   4322:             RRETURN(MATCH_NOMATCH);
                   4323:             }
                   4324:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
                   4325:             RRETURN(MATCH_NOMATCH);
                   4326:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4327:           }
                   4328:         break;
                   4329: 
                   4330:         default:
                   4331:         RRETURN(PCRE_ERROR_INTERNAL);
                   4332:         }  /* End switch(ctype) */
                   4333: 
                   4334:       else
                   4335: #endif     /* SUPPORT_UTF8 */
                   4336: 
                   4337:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   4338:       than OP_PROP and OP_NOTPROP. */
                   4339: 
                   4340:       switch(ctype)
                   4341:         {
                   4342:         case OP_ANY:
                   4343:         for (i = 1; i <= min; i++)
                   4344:           {
                   4345:           if (eptr >= md->end_subject)
                   4346:             {
                   4347:             SCHECK_PARTIAL();
                   4348:             RRETURN(MATCH_NOMATCH);
                   4349:             }
                   4350:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   4351:           eptr++;
                   4352:           }
                   4353:         break;
                   4354: 
                   4355:         case OP_ALLANY:
                   4356:         if (eptr > md->end_subject - min)
                   4357:           {
                   4358:           SCHECK_PARTIAL();
                   4359:           RRETURN(MATCH_NOMATCH);
                   4360:           }
                   4361:         eptr += min;
                   4362:         break;
                   4363: 
                   4364:         case OP_ANYBYTE:
                   4365:         if (eptr > md->end_subject - min)
                   4366:           {
                   4367:           SCHECK_PARTIAL();
                   4368:           RRETURN(MATCH_NOMATCH);
                   4369:           }
                   4370:         eptr += min;
                   4371:         break;
                   4372: 
                   4373:         case OP_ANYNL:
                   4374:         for (i = 1; i <= min; i++)
                   4375:           {
                   4376:           if (eptr >= md->end_subject)
                   4377:             {
                   4378:             SCHECK_PARTIAL();
                   4379:             RRETURN(MATCH_NOMATCH);
                   4380:             }
                   4381:           switch(*eptr++)
                   4382:             {
                   4383:             default: RRETURN(MATCH_NOMATCH);
                   4384: 
                   4385:             case 0x000d:
                   4386:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4387:             break;
                   4388: 
                   4389:             case 0x000a:
                   4390:             break;
                   4391: 
                   4392:             case 0x000b:
                   4393:             case 0x000c:
                   4394:             case 0x0085:
                   4395:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4396:             break;
                   4397:             }
                   4398:           }
                   4399:         break;
                   4400: 
                   4401:         case OP_NOT_HSPACE:
                   4402:         for (i = 1; i <= min; i++)
                   4403:           {
                   4404:           if (eptr >= md->end_subject)
                   4405:             {
                   4406:             SCHECK_PARTIAL();
                   4407:             RRETURN(MATCH_NOMATCH);
                   4408:             }
                   4409:           switch(*eptr++)
                   4410:             {
                   4411:             default: break;
                   4412:             case 0x09:      /* HT */
                   4413:             case 0x20:      /* SPACE */
                   4414:             case 0xa0:      /* NBSP */
                   4415:             RRETURN(MATCH_NOMATCH);
                   4416:             }
                   4417:           }
                   4418:         break;
                   4419: 
                   4420:         case OP_HSPACE:
                   4421:         for (i = 1; i <= min; i++)
                   4422:           {
                   4423:           if (eptr >= md->end_subject)
                   4424:             {
                   4425:             SCHECK_PARTIAL();
                   4426:             RRETURN(MATCH_NOMATCH);
                   4427:             }
                   4428:           switch(*eptr++)
                   4429:             {
                   4430:             default: RRETURN(MATCH_NOMATCH);
                   4431:             case 0x09:      /* HT */
                   4432:             case 0x20:      /* SPACE */
                   4433:             case 0xa0:      /* NBSP */
                   4434:             break;
                   4435:             }
                   4436:           }
                   4437:         break;
                   4438: 
                   4439:         case OP_NOT_VSPACE:
                   4440:         for (i = 1; i <= min; i++)
                   4441:           {
                   4442:           if (eptr >= md->end_subject)
                   4443:             {
                   4444:             SCHECK_PARTIAL();
                   4445:             RRETURN(MATCH_NOMATCH);
                   4446:             }
                   4447:           switch(*eptr++)
                   4448:             {
                   4449:             default: break;
                   4450:             case 0x0a:      /* LF */
                   4451:             case 0x0b:      /* VT */
                   4452:             case 0x0c:      /* FF */
                   4453:             case 0x0d:      /* CR */
                   4454:             case 0x85:      /* NEL */
                   4455:             RRETURN(MATCH_NOMATCH);
                   4456:             }
                   4457:           }
                   4458:         break;
                   4459: 
                   4460:         case OP_VSPACE:
                   4461:         for (i = 1; i <= min; i++)
                   4462:           {
                   4463:           if (eptr >= md->end_subject)
                   4464:             {
                   4465:             SCHECK_PARTIAL();
                   4466:             RRETURN(MATCH_NOMATCH);
                   4467:             }
                   4468:           switch(*eptr++)
                   4469:             {
                   4470:             default: RRETURN(MATCH_NOMATCH);
                   4471:             case 0x0a:      /* LF */
                   4472:             case 0x0b:      /* VT */
                   4473:             case 0x0c:      /* FF */
                   4474:             case 0x0d:      /* CR */
                   4475:             case 0x85:      /* NEL */
                   4476:             break;
                   4477:             }
                   4478:           }
                   4479:         break;
                   4480: 
                   4481:         case OP_NOT_DIGIT:
                   4482:         for (i = 1; i <= min; i++)
                   4483:           {
                   4484:           if (eptr >= md->end_subject)
                   4485:             {
                   4486:             SCHECK_PARTIAL();
                   4487:             RRETURN(MATCH_NOMATCH);
                   4488:             }
                   4489:           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
                   4490:           }
                   4491:         break;
                   4492: 
                   4493:         case OP_DIGIT:
                   4494:         for (i = 1; i <= min; i++)
                   4495:           {
                   4496:           if (eptr >= md->end_subject)
                   4497:             {
                   4498:             SCHECK_PARTIAL();
                   4499:             RRETURN(MATCH_NOMATCH);
                   4500:             }
                   4501:           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
                   4502:           }
                   4503:         break;
                   4504: 
                   4505:         case OP_NOT_WHITESPACE:
                   4506:         for (i = 1; i <= min; i++)
                   4507:           {
                   4508:           if (eptr >= md->end_subject)
                   4509:             {
                   4510:             SCHECK_PARTIAL();
                   4511:             RRETURN(MATCH_NOMATCH);
                   4512:             }
                   4513:           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
                   4514:           }
                   4515:         break;
                   4516: 
                   4517:         case OP_WHITESPACE:
                   4518:         for (i = 1; i <= min; i++)
                   4519:           {
                   4520:           if (eptr >= md->end_subject)
                   4521:             {
                   4522:             SCHECK_PARTIAL();
                   4523:             RRETURN(MATCH_NOMATCH);
                   4524:             }
                   4525:           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
                   4526:           }
                   4527:         break;
                   4528: 
                   4529:         case OP_NOT_WORDCHAR:
                   4530:         for (i = 1; i <= min; i++)
                   4531:           {
                   4532:           if (eptr >= md->end_subject)
                   4533:             {
                   4534:             SCHECK_PARTIAL();
                   4535:             RRETURN(MATCH_NOMATCH);
                   4536:             }
                   4537:           if ((md->ctypes[*eptr++] & ctype_word) != 0)
                   4538:             RRETURN(MATCH_NOMATCH);
                   4539:           }
                   4540:         break;
                   4541: 
                   4542:         case OP_WORDCHAR:
                   4543:         for (i = 1; i <= min; i++)
                   4544:           {
                   4545:           if (eptr >= md->end_subject)
                   4546:             {
                   4547:             SCHECK_PARTIAL();
                   4548:             RRETURN(MATCH_NOMATCH);
                   4549:             }
                   4550:           if ((md->ctypes[*eptr++] & ctype_word) == 0)
                   4551:             RRETURN(MATCH_NOMATCH);
                   4552:           }
                   4553:         break;
                   4554: 
                   4555:         default:
                   4556:         RRETURN(PCRE_ERROR_INTERNAL);
                   4557:         }
                   4558:       }
                   4559: 
                   4560:     /* If min = max, continue at the same level without recursing */
                   4561: 
                   4562:     if (min == max) continue;
                   4563: 
                   4564:     /* If minimizing, we have to test the rest of the pattern before each
                   4565:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4566:     separate the UCP cases. */
                   4567: 
                   4568:     if (minimize)
                   4569:       {
                   4570: #ifdef SUPPORT_UCP
                   4571:       if (prop_type >= 0)
                   4572:         {
                   4573:         switch(prop_type)
                   4574:           {
                   4575:           case PT_ANY:
                   4576:           for (fi = min;; fi++)
                   4577:             {
                   4578:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
                   4579:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4580:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4581:             if (eptr >= md->end_subject)
                   4582:               {
                   4583:               SCHECK_PARTIAL();
                   4584:               RRETURN(MATCH_NOMATCH);
                   4585:               }
                   4586:             GETCHARINCTEST(c, eptr);
                   4587:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   4588:             }
                   4589:           /* Control never gets here */
                   4590: 
                   4591:           case PT_LAMP:
                   4592:           for (fi = min;; fi++)
                   4593:             {
                   4594:             int chartype;
                   4595:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
                   4596:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4597:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4598:             if (eptr >= md->end_subject)
                   4599:               {
                   4600:               SCHECK_PARTIAL();
                   4601:               RRETURN(MATCH_NOMATCH);
                   4602:               }
                   4603:             GETCHARINCTEST(c, eptr);
                   4604:             chartype = UCD_CHARTYPE(c);
                   4605:             if ((chartype == ucp_Lu ||
                   4606:                  chartype == ucp_Ll ||
                   4607:                  chartype == ucp_Lt) == prop_fail_result)
                   4608:               RRETURN(MATCH_NOMATCH);
                   4609:             }
                   4610:           /* Control never gets here */
                   4611: 
                   4612:           case PT_GC:
                   4613:           for (fi = min;; fi++)
                   4614:             {
                   4615:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
                   4616:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4617:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4618:             if (eptr >= md->end_subject)
                   4619:               {
                   4620:               SCHECK_PARTIAL();
                   4621:               RRETURN(MATCH_NOMATCH);
                   4622:               }
                   4623:             GETCHARINCTEST(c, eptr);
                   4624:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4625:               RRETURN(MATCH_NOMATCH);
                   4626:             }
                   4627:           /* Control never gets here */
                   4628: 
                   4629:           case PT_PC:
                   4630:           for (fi = min;; fi++)
                   4631:             {
                   4632:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
                   4633:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4634:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4635:             if (eptr >= md->end_subject)
                   4636:               {
                   4637:               SCHECK_PARTIAL();
                   4638:               RRETURN(MATCH_NOMATCH);
                   4639:               }
                   4640:             GETCHARINCTEST(c, eptr);
                   4641:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4642:               RRETURN(MATCH_NOMATCH);
                   4643:             }
                   4644:           /* Control never gets here */
                   4645: 
                   4646:           case PT_SC:
                   4647:           for (fi = min;; fi++)
                   4648:             {
                   4649:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
                   4650:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4651:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4652:             if (eptr >= md->end_subject)
                   4653:               {
                   4654:               SCHECK_PARTIAL();
                   4655:               RRETURN(MATCH_NOMATCH);
                   4656:               }
                   4657:             GETCHARINCTEST(c, eptr);
                   4658:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4659:               RRETURN(MATCH_NOMATCH);
                   4660:             }
                   4661:           /* Control never gets here */
                   4662: 
                   4663:           case PT_ALNUM:
                   4664:           for (fi = min;; fi++)
                   4665:             {
                   4666:             int category;
                   4667:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
                   4668:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4669:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4670:             if (eptr >= md->end_subject)
                   4671:               {
                   4672:               SCHECK_PARTIAL();
                   4673:               RRETURN(MATCH_NOMATCH);
                   4674:               }
                   4675:             GETCHARINCTEST(c, eptr);
                   4676:             category = UCD_CATEGORY(c);
                   4677:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   4678:               RRETURN(MATCH_NOMATCH);
                   4679:             }
                   4680:           /* Control never gets here */
                   4681: 
                   4682:           case PT_SPACE:    /* Perl space */
                   4683:           for (fi = min;; fi++)
                   4684:             {
                   4685:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
                   4686:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4687:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4688:             if (eptr >= md->end_subject)
                   4689:               {
                   4690:               SCHECK_PARTIAL();
                   4691:               RRETURN(MATCH_NOMATCH);
                   4692:               }
                   4693:             GETCHARINCTEST(c, eptr);
                   4694:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4695:                  c == CHAR_FF || c == CHAR_CR)
                   4696:                    == prop_fail_result)
                   4697:               RRETURN(MATCH_NOMATCH);
                   4698:             }
                   4699:           /* Control never gets here */
                   4700: 
                   4701:           case PT_PXSPACE:  /* POSIX space */
                   4702:           for (fi = min;; fi++)
                   4703:             {
                   4704:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
                   4705:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4706:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4707:             if (eptr >= md->end_subject)
                   4708:               {
                   4709:               SCHECK_PARTIAL();
                   4710:               RRETURN(MATCH_NOMATCH);
                   4711:               }
                   4712:             GETCHARINCTEST(c, eptr);
                   4713:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4714:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4715:                    == prop_fail_result)
                   4716:               RRETURN(MATCH_NOMATCH);
                   4717:             }
                   4718:           /* Control never gets here */
                   4719: 
                   4720:           case PT_WORD:
                   4721:           for (fi = min;; fi++)
                   4722:             {
                   4723:             int category;
                   4724:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
                   4725:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4726:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4727:             if (eptr >= md->end_subject)
                   4728:               {
                   4729:               SCHECK_PARTIAL();
                   4730:               RRETURN(MATCH_NOMATCH);
                   4731:               }
                   4732:             GETCHARINCTEST(c, eptr);
                   4733:             category = UCD_CATEGORY(c);
                   4734:             if ((category == ucp_L ||
                   4735:                  category == ucp_N ||
                   4736:                  c == CHAR_UNDERSCORE)
                   4737:                    == prop_fail_result)
                   4738:               RRETURN(MATCH_NOMATCH);
                   4739:             }
                   4740:           /* Control never gets here */
                   4741: 
                   4742:           /* This should never occur */
                   4743: 
                   4744:           default:
                   4745:           RRETURN(PCRE_ERROR_INTERNAL);
                   4746:           }
                   4747:         }
                   4748: 
                   4749:       /* Match extended Unicode sequences. We will get here only if the
                   4750:       support is in the binary; otherwise a compile-time error occurs. */
                   4751: 
                   4752:       else if (ctype == OP_EXTUNI)
                   4753:         {
                   4754:         for (fi = min;; fi++)
                   4755:           {
                   4756:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
                   4757:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4758:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4759:           if (eptr >= md->end_subject)
                   4760:             {
                   4761:             SCHECK_PARTIAL();
                   4762:             RRETURN(MATCH_NOMATCH);
                   4763:             }
                   4764:           GETCHARINCTEST(c, eptr);
                   4765:           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
                   4766:           while (eptr < md->end_subject)
                   4767:             {
                   4768:             int len = 1;
                   4769:             if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   4770:             if (UCD_CATEGORY(c) != ucp_M) break;
                   4771:             eptr += len;
                   4772:             }
                   4773:           }
                   4774:         }
                   4775:       else
                   4776: #endif     /* SUPPORT_UCP */
                   4777: 
                   4778: #ifdef SUPPORT_UTF8
                   4779:       /* UTF-8 mode */
                   4780:       if (utf8)
                   4781:         {
                   4782:         for (fi = min;; fi++)
                   4783:           {
                   4784:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
                   4785:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4786:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4787:           if (eptr >= md->end_subject)
                   4788:             {
                   4789:             SCHECK_PARTIAL();
                   4790:             RRETURN(MATCH_NOMATCH);
                   4791:             }
                   4792:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   4793:             RRETURN(MATCH_NOMATCH);
                   4794:           GETCHARINC(c, eptr);
                   4795:           switch(ctype)
                   4796:             {
                   4797:             case OP_ANY:        /* This is the non-NL case */
                   4798:             case OP_ALLANY:
                   4799:             case OP_ANYBYTE:
                   4800:             break;
                   4801: 
                   4802:             case OP_ANYNL:
                   4803:             switch(c)
                   4804:               {
                   4805:               default: RRETURN(MATCH_NOMATCH);
                   4806:               case 0x000d:
                   4807:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4808:               break;
                   4809:               case 0x000a:
                   4810:               break;
                   4811: 
                   4812:               case 0x000b:
                   4813:               case 0x000c:
                   4814:               case 0x0085:
                   4815:               case 0x2028:
                   4816:               case 0x2029:
                   4817:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4818:               break;
                   4819:               }
                   4820:             break;
                   4821: 
                   4822:             case OP_NOT_HSPACE:
                   4823:             switch(c)
                   4824:               {
                   4825:               default: break;
                   4826:               case 0x09:      /* HT */
                   4827:               case 0x20:      /* SPACE */
                   4828:               case 0xa0:      /* NBSP */
                   4829:               case 0x1680:    /* OGHAM SPACE MARK */
                   4830:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4831:               case 0x2000:    /* EN QUAD */
                   4832:               case 0x2001:    /* EM QUAD */
                   4833:               case 0x2002:    /* EN SPACE */
                   4834:               case 0x2003:    /* EM SPACE */
                   4835:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4836:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4837:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4838:               case 0x2007:    /* FIGURE SPACE */
                   4839:               case 0x2008:    /* PUNCTUATION SPACE */
                   4840:               case 0x2009:    /* THIN SPACE */
                   4841:               case 0x200A:    /* HAIR SPACE */
                   4842:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4843:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4844:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4845:               RRETURN(MATCH_NOMATCH);
                   4846:               }
                   4847:             break;
                   4848: 
                   4849:             case OP_HSPACE:
                   4850:             switch(c)
                   4851:               {
                   4852:               default: RRETURN(MATCH_NOMATCH);
                   4853:               case 0x09:      /* HT */
                   4854:               case 0x20:      /* SPACE */
                   4855:               case 0xa0:      /* NBSP */
                   4856:               case 0x1680:    /* OGHAM SPACE MARK */
                   4857:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4858:               case 0x2000:    /* EN QUAD */
                   4859:               case 0x2001:    /* EM QUAD */
                   4860:               case 0x2002:    /* EN SPACE */
                   4861:               case 0x2003:    /* EM SPACE */
                   4862:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4863:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4864:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4865:               case 0x2007:    /* FIGURE SPACE */
                   4866:               case 0x2008:    /* PUNCTUATION SPACE */
                   4867:               case 0x2009:    /* THIN SPACE */
                   4868:               case 0x200A:    /* HAIR SPACE */
                   4869:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4870:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4871:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4872:               break;
                   4873:               }
                   4874:             break;
                   4875: 
                   4876:             case OP_NOT_VSPACE:
                   4877:             switch(c)
                   4878:               {
                   4879:               default: break;
                   4880:               case 0x0a:      /* LF */
                   4881:               case 0x0b:      /* VT */
                   4882:               case 0x0c:      /* FF */
                   4883:               case 0x0d:      /* CR */
                   4884:               case 0x85:      /* NEL */
                   4885:               case 0x2028:    /* LINE SEPARATOR */
                   4886:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4887:               RRETURN(MATCH_NOMATCH);
                   4888:               }
                   4889:             break;
                   4890: 
                   4891:             case OP_VSPACE:
                   4892:             switch(c)
                   4893:               {
                   4894:               default: RRETURN(MATCH_NOMATCH);
                   4895:               case 0x0a:      /* LF */
                   4896:               case 0x0b:      /* VT */
                   4897:               case 0x0c:      /* FF */
                   4898:               case 0x0d:      /* CR */
                   4899:               case 0x85:      /* NEL */
                   4900:               case 0x2028:    /* LINE SEPARATOR */
                   4901:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4902:               break;
                   4903:               }
                   4904:             break;
                   4905: 
                   4906:             case OP_NOT_DIGIT:
                   4907:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
                   4908:               RRETURN(MATCH_NOMATCH);
                   4909:             break;
                   4910: 
                   4911:             case OP_DIGIT:
                   4912:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
                   4913:               RRETURN(MATCH_NOMATCH);
                   4914:             break;
                   4915: 
                   4916:             case OP_NOT_WHITESPACE:
                   4917:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
                   4918:               RRETURN(MATCH_NOMATCH);
                   4919:             break;
                   4920: 
                   4921:             case OP_WHITESPACE:
                   4922:             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
                   4923:               RRETURN(MATCH_NOMATCH);
                   4924:             break;
                   4925: 
                   4926:             case OP_NOT_WORDCHAR:
                   4927:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
                   4928:               RRETURN(MATCH_NOMATCH);
                   4929:             break;
                   4930: 
                   4931:             case OP_WORDCHAR:
                   4932:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
                   4933:               RRETURN(MATCH_NOMATCH);
                   4934:             break;
                   4935: 
                   4936:             default:
                   4937:             RRETURN(PCRE_ERROR_INTERNAL);
                   4938:             }
                   4939:           }
                   4940:         }
                   4941:       else
                   4942: #endif
                   4943:       /* Not UTF-8 mode */
                   4944:         {
                   4945:         for (fi = min;; fi++)
                   4946:           {
                   4947:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
                   4948:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4949:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4950:           if (eptr >= md->end_subject)
                   4951:             {
                   4952:             SCHECK_PARTIAL();
                   4953:             RRETURN(MATCH_NOMATCH);
                   4954:             }
                   4955:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   4956:             RRETURN(MATCH_NOMATCH);
                   4957:           c = *eptr++;
                   4958:           switch(ctype)
                   4959:             {
                   4960:             case OP_ANY:     /* This is the non-NL case */
                   4961:             case OP_ALLANY:
                   4962:             case OP_ANYBYTE:
                   4963:             break;
                   4964: 
                   4965:             case OP_ANYNL:
                   4966:             switch(c)
                   4967:               {
                   4968:               default: RRETURN(MATCH_NOMATCH);
                   4969:               case 0x000d:
                   4970:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4971:               break;
                   4972: 
                   4973:               case 0x000a:
                   4974:               break;
                   4975: 
                   4976:               case 0x000b:
                   4977:               case 0x000c:
                   4978:               case 0x0085:
                   4979:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4980:               break;
                   4981:               }
                   4982:             break;
                   4983: 
                   4984:             case OP_NOT_HSPACE:
                   4985:             switch(c)
                   4986:               {
                   4987:               default: break;
                   4988:               case 0x09:      /* HT */
                   4989:               case 0x20:      /* SPACE */
                   4990:               case 0xa0:      /* NBSP */
                   4991:               RRETURN(MATCH_NOMATCH);
                   4992:               }
                   4993:             break;
                   4994: 
                   4995:             case OP_HSPACE:
                   4996:             switch(c)
                   4997:               {
                   4998:               default: RRETURN(MATCH_NOMATCH);
                   4999:               case 0x09:      /* HT */
                   5000:               case 0x20:      /* SPACE */
                   5001:               case 0xa0:      /* NBSP */
                   5002:               break;
                   5003:               }
                   5004:             break;
                   5005: 
                   5006:             case OP_NOT_VSPACE:
                   5007:             switch(c)
                   5008:               {
                   5009:               default: break;
                   5010:               case 0x0a:      /* LF */
                   5011:               case 0x0b:      /* VT */
                   5012:               case 0x0c:      /* FF */
                   5013:               case 0x0d:      /* CR */
                   5014:               case 0x85:      /* NEL */
                   5015:               RRETURN(MATCH_NOMATCH);
                   5016:               }
                   5017:             break;
                   5018: 
                   5019:             case OP_VSPACE:
                   5020:             switch(c)
                   5021:               {
                   5022:               default: RRETURN(MATCH_NOMATCH);
                   5023:               case 0x0a:      /* LF */
                   5024:               case 0x0b:      /* VT */
                   5025:               case 0x0c:      /* FF */
                   5026:               case 0x0d:      /* CR */
                   5027:               case 0x85:      /* NEL */
                   5028:               break;
                   5029:               }
                   5030:             break;
                   5031: 
                   5032:             case OP_NOT_DIGIT:
                   5033:             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
                   5034:             break;
                   5035: 
                   5036:             case OP_DIGIT:
                   5037:             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
                   5038:             break;
                   5039: 
                   5040:             case OP_NOT_WHITESPACE:
                   5041:             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
                   5042:             break;
                   5043: 
                   5044:             case OP_WHITESPACE:
                   5045:             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
                   5046:             break;
                   5047: 
                   5048:             case OP_NOT_WORDCHAR:
                   5049:             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
                   5050:             break;
                   5051: 
                   5052:             case OP_WORDCHAR:
                   5053:             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
                   5054:             break;
                   5055: 
                   5056:             default:
                   5057:             RRETURN(PCRE_ERROR_INTERNAL);
                   5058:             }
                   5059:           }
                   5060:         }
                   5061:       /* Control never gets here */
                   5062:       }
                   5063: 
                   5064:     /* If maximizing, it is worth using inline code for speed, doing the type
                   5065:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   5066:     UTF-8 and UCP stuff separate. */
                   5067: 
                   5068:     else
                   5069:       {
                   5070:       pp = eptr;  /* Remember where we started */
                   5071: 
                   5072: #ifdef SUPPORT_UCP
                   5073:       if (prop_type >= 0)
                   5074:         {
                   5075:         switch(prop_type)
                   5076:           {
                   5077:           case PT_ANY:
                   5078:           for (i = min; i < max; i++)
                   5079:             {
                   5080:             int len = 1;
                   5081:             if (eptr >= md->end_subject)
                   5082:               {
                   5083:               SCHECK_PARTIAL();
                   5084:               break;
                   5085:               }
                   5086:             GETCHARLENTEST(c, eptr, len);
                   5087:             if (prop_fail_result) break;
                   5088:             eptr+= len;
                   5089:             }
                   5090:           break;
                   5091: 
                   5092:           case PT_LAMP:
                   5093:           for (i = min; i < max; i++)
                   5094:             {
                   5095:             int chartype;
                   5096:             int len = 1;
                   5097:             if (eptr >= md->end_subject)
                   5098:               {
                   5099:               SCHECK_PARTIAL();
                   5100:               break;
                   5101:               }
                   5102:             GETCHARLENTEST(c, eptr, len);
                   5103:             chartype = UCD_CHARTYPE(c);
                   5104:             if ((chartype == ucp_Lu ||
                   5105:                  chartype == ucp_Ll ||
                   5106:                  chartype == ucp_Lt) == prop_fail_result)
                   5107:               break;
                   5108:             eptr+= len;
                   5109:             }
                   5110:           break;
                   5111: 
                   5112:           case PT_GC:
                   5113:           for (i = min; i < max; i++)
                   5114:             {
                   5115:             int len = 1;
                   5116:             if (eptr >= md->end_subject)
                   5117:               {
                   5118:               SCHECK_PARTIAL();
                   5119:               break;
                   5120:               }
                   5121:             GETCHARLENTEST(c, eptr, len);
                   5122:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
                   5123:             eptr+= len;
                   5124:             }
                   5125:           break;
                   5126: 
                   5127:           case PT_PC:
                   5128:           for (i = min; i < max; i++)
                   5129:             {
                   5130:             int len = 1;
                   5131:             if (eptr >= md->end_subject)
                   5132:               {
                   5133:               SCHECK_PARTIAL();
                   5134:               break;
                   5135:               }
                   5136:             GETCHARLENTEST(c, eptr, len);
                   5137:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
                   5138:             eptr+= len;
                   5139:             }
                   5140:           break;
                   5141: 
                   5142:           case PT_SC:
                   5143:           for (i = min; i < max; i++)
                   5144:             {
                   5145:             int len = 1;
                   5146:             if (eptr >= md->end_subject)
                   5147:               {
                   5148:               SCHECK_PARTIAL();
                   5149:               break;
                   5150:               }
                   5151:             GETCHARLENTEST(c, eptr, len);
                   5152:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
                   5153:             eptr+= len;
                   5154:             }
                   5155:           break;
                   5156: 
                   5157:           case PT_ALNUM:
                   5158:           for (i = min; i < max; i++)
                   5159:             {
                   5160:             int category;
                   5161:             int len = 1;
                   5162:             if (eptr >= md->end_subject)
                   5163:               {
                   5164:               SCHECK_PARTIAL();
                   5165:               break;
                   5166:               }
                   5167:             GETCHARLENTEST(c, eptr, len);
                   5168:             category = UCD_CATEGORY(c);
                   5169:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   5170:               break;
                   5171:             eptr+= len;
                   5172:             }
                   5173:           break;
                   5174: 
                   5175:           case PT_SPACE:    /* Perl space */
                   5176:           for (i = min; i < max; i++)
                   5177:             {
                   5178:             int len = 1;
                   5179:             if (eptr >= md->end_subject)
                   5180:               {
                   5181:               SCHECK_PARTIAL();
                   5182:               break;
                   5183:               }
                   5184:             GETCHARLENTEST(c, eptr, len);
                   5185:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5186:                  c == CHAR_FF || c == CHAR_CR)
                   5187:                  == prop_fail_result)
                   5188:               break;
                   5189:             eptr+= len;
                   5190:             }
                   5191:           break;
                   5192: 
                   5193:           case PT_PXSPACE:  /* POSIX space */
                   5194:           for (i = min; i < max; i++)
                   5195:             {
                   5196:             int len = 1;
                   5197:             if (eptr >= md->end_subject)
                   5198:               {
                   5199:               SCHECK_PARTIAL();
                   5200:               break;
                   5201:               }
                   5202:             GETCHARLENTEST(c, eptr, len);
                   5203:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5204:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   5205:                  == prop_fail_result)
                   5206:               break;
                   5207:             eptr+= len;
                   5208:             }
                   5209:           break;
                   5210: 
                   5211:           case PT_WORD:
                   5212:           for (i = min; i < max; i++)
                   5213:             {
                   5214:             int category;
                   5215:             int len = 1;
                   5216:             if (eptr >= md->end_subject)
                   5217:               {
                   5218:               SCHECK_PARTIAL();
                   5219:               break;
                   5220:               }
                   5221:             GETCHARLENTEST(c, eptr, len);
                   5222:             category = UCD_CATEGORY(c);
                   5223:             if ((category == ucp_L || category == ucp_N ||
                   5224:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   5225:               break;
                   5226:             eptr+= len;
                   5227:             }
                   5228:           break;
                   5229: 
                   5230:           default:
                   5231:           RRETURN(PCRE_ERROR_INTERNAL);
                   5232:           }
                   5233: 
                   5234:         /* eptr is now past the end of the maximum run */
                   5235: 
                   5236:         if (possessive) continue;
                   5237:         for(;;)
                   5238:           {
                   5239:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
                   5240:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5241:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5242:           if (utf8) BACKCHAR(eptr);
                   5243:           }
                   5244:         }
                   5245: 
                   5246:       /* Match extended Unicode sequences. We will get here only if the
                   5247:       support is in the binary; otherwise a compile-time error occurs. */
                   5248: 
                   5249:       else if (ctype == OP_EXTUNI)
                   5250:         {
                   5251:         for (i = min; i < max; i++)
                   5252:           {
                   5253:           int len = 1;
                   5254:           if (eptr >= md->end_subject)
                   5255:             {
                   5256:             SCHECK_PARTIAL();
                   5257:             break;
                   5258:             }
                   5259:           if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   5260:           if (UCD_CATEGORY(c) == ucp_M) break;
                   5261:           eptr += len;
                   5262:           while (eptr < md->end_subject)
                   5263:             {
                   5264:             len = 1;
                   5265:             if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   5266:             if (UCD_CATEGORY(c) != ucp_M) break;
                   5267:             eptr += len;
                   5268:             }
                   5269:           }
                   5270: 
                   5271:         /* eptr is now past the end of the maximum run */
                   5272: 
                   5273:         if (possessive) continue;
                   5274: 
                   5275:         for(;;)
                   5276:           {
                   5277:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
                   5278:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5279:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5280:           for (;;)                        /* Move back over one extended */
                   5281:             {
                   5282:             if (!utf8) c = *eptr; else
                   5283:               {
                   5284:               BACKCHAR(eptr);
                   5285:               GETCHAR(c, eptr);
                   5286:               }
                   5287:             if (UCD_CATEGORY(c) != ucp_M) break;
                   5288:             eptr--;
                   5289:             }
                   5290:           }
                   5291:         }
                   5292: 
                   5293:       else
                   5294: #endif   /* SUPPORT_UCP */
                   5295: 
                   5296: #ifdef SUPPORT_UTF8
                   5297:       /* UTF-8 mode */
                   5298: 
                   5299:       if (utf8)
                   5300:         {
                   5301:         switch(ctype)
                   5302:           {
                   5303:           case OP_ANY:
                   5304:           if (max < INT_MAX)
                   5305:             {
                   5306:             for (i = min; i < max; i++)
                   5307:               {
                   5308:               if (eptr >= md->end_subject)
                   5309:                 {
                   5310:                 SCHECK_PARTIAL();
                   5311:                 break;
                   5312:                 }
                   5313:               if (IS_NEWLINE(eptr)) break;
                   5314:               eptr++;
                   5315:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5316:               }
                   5317:             }
                   5318: 
                   5319:           /* Handle unlimited UTF-8 repeat */
                   5320: 
                   5321:           else
                   5322:             {
                   5323:             for (i = min; i < max; i++)
                   5324:               {
                   5325:               if (eptr >= md->end_subject)
                   5326:                 {
                   5327:                 SCHECK_PARTIAL();
                   5328:                 break;
                   5329:                 }
                   5330:               if (IS_NEWLINE(eptr)) break;
                   5331:               eptr++;
                   5332:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5333:               }
                   5334:             }
                   5335:           break;
                   5336: 
                   5337:           case OP_ALLANY:
                   5338:           if (max < INT_MAX)
                   5339:             {
                   5340:             for (i = min; i < max; i++)
                   5341:               {
                   5342:               if (eptr >= md->end_subject)
                   5343:                 {
                   5344:                 SCHECK_PARTIAL();
                   5345:                 break;
                   5346:                 }
                   5347:               eptr++;
                   5348:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5349:               }
                   5350:             }
                   5351:           else
                   5352:             {
                   5353:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5354:             SCHECK_PARTIAL();
                   5355:             }
                   5356:           break;
                   5357: 
                   5358:           /* The byte case is the same as non-UTF8 */
                   5359: 
                   5360:           case OP_ANYBYTE:
                   5361:           c = max - min;
                   5362:           if (c > (unsigned int)(md->end_subject - eptr))
                   5363:             {
                   5364:             eptr = md->end_subject;
                   5365:             SCHECK_PARTIAL();
                   5366:             }
                   5367:           else eptr += c;
                   5368:           break;
                   5369: 
                   5370:           case OP_ANYNL:
                   5371:           for (i = min; i < max; i++)
                   5372:             {
                   5373:             int len = 1;
                   5374:             if (eptr >= md->end_subject)
                   5375:               {
                   5376:               SCHECK_PARTIAL();
                   5377:               break;
                   5378:               }
                   5379:             GETCHARLEN(c, eptr, len);
                   5380:             if (c == 0x000d)
                   5381:               {
                   5382:               if (++eptr >= md->end_subject) break;
                   5383:               if (*eptr == 0x000a) eptr++;
                   5384:               }
                   5385:             else
                   5386:               {
                   5387:               if (c != 0x000a &&
                   5388:                   (md->bsr_anycrlf ||
                   5389:                    (c != 0x000b && c != 0x000c &&
                   5390:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   5391:                 break;
                   5392:               eptr += len;
                   5393:               }
                   5394:             }
                   5395:           break;
                   5396: 
                   5397:           case OP_NOT_HSPACE:
                   5398:           case OP_HSPACE:
                   5399:           for (i = min; i < max; i++)
                   5400:             {
                   5401:             BOOL gotspace;
                   5402:             int len = 1;
                   5403:             if (eptr >= md->end_subject)
                   5404:               {
                   5405:               SCHECK_PARTIAL();
                   5406:               break;
                   5407:               }
                   5408:             GETCHARLEN(c, eptr, len);
                   5409:             switch(c)
                   5410:               {
                   5411:               default: gotspace = FALSE; break;
                   5412:               case 0x09:      /* HT */
                   5413:               case 0x20:      /* SPACE */
                   5414:               case 0xa0:      /* NBSP */
                   5415:               case 0x1680:    /* OGHAM SPACE MARK */
                   5416:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5417:               case 0x2000:    /* EN QUAD */
                   5418:               case 0x2001:    /* EM QUAD */
                   5419:               case 0x2002:    /* EN SPACE */
                   5420:               case 0x2003:    /* EM SPACE */
                   5421:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5422:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5423:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5424:               case 0x2007:    /* FIGURE SPACE */
                   5425:               case 0x2008:    /* PUNCTUATION SPACE */
                   5426:               case 0x2009:    /* THIN SPACE */
                   5427:               case 0x200A:    /* HAIR SPACE */
                   5428:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5429:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5430:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5431:               gotspace = TRUE;
                   5432:               break;
                   5433:               }
                   5434:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5435:             eptr += len;
                   5436:             }
                   5437:           break;
                   5438: 
                   5439:           case OP_NOT_VSPACE:
                   5440:           case OP_VSPACE:
                   5441:           for (i = min; i < max; i++)
                   5442:             {
                   5443:             BOOL gotspace;
                   5444:             int len = 1;
                   5445:             if (eptr >= md->end_subject)
                   5446:               {
                   5447:               SCHECK_PARTIAL();
                   5448:               break;
                   5449:               }
                   5450:             GETCHARLEN(c, eptr, len);
                   5451:             switch(c)
                   5452:               {
                   5453:               default: gotspace = FALSE; break;
                   5454:               case 0x0a:      /* LF */
                   5455:               case 0x0b:      /* VT */
                   5456:               case 0x0c:      /* FF */
                   5457:               case 0x0d:      /* CR */
                   5458:               case 0x85:      /* NEL */
                   5459:               case 0x2028:    /* LINE SEPARATOR */
                   5460:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5461:               gotspace = TRUE;
                   5462:               break;
                   5463:               }
                   5464:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5465:             eptr += len;
                   5466:             }
                   5467:           break;
                   5468: 
                   5469:           case OP_NOT_DIGIT:
                   5470:           for (i = min; i < max; i++)
                   5471:             {
                   5472:             int len = 1;
                   5473:             if (eptr >= md->end_subject)
                   5474:               {
                   5475:               SCHECK_PARTIAL();
                   5476:               break;
                   5477:               }
                   5478:             GETCHARLEN(c, eptr, len);
                   5479:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5480:             eptr+= len;
                   5481:             }
                   5482:           break;
                   5483: 
                   5484:           case OP_DIGIT:
                   5485:           for (i = min; i < max; i++)
                   5486:             {
                   5487:             int len = 1;
                   5488:             if (eptr >= md->end_subject)
                   5489:               {
                   5490:               SCHECK_PARTIAL();
                   5491:               break;
                   5492:               }
                   5493:             GETCHARLEN(c, eptr, len);
                   5494:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5495:             eptr+= len;
                   5496:             }
                   5497:           break;
                   5498: 
                   5499:           case OP_NOT_WHITESPACE:
                   5500:           for (i = min; i < max; i++)
                   5501:             {
                   5502:             int len = 1;
                   5503:             if (eptr >= md->end_subject)
                   5504:               {
                   5505:               SCHECK_PARTIAL();
                   5506:               break;
                   5507:               }
                   5508:             GETCHARLEN(c, eptr, len);
                   5509:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5510:             eptr+= len;
                   5511:             }
                   5512:           break;
                   5513: 
                   5514:           case OP_WHITESPACE:
                   5515:           for (i = min; i < max; i++)
                   5516:             {
                   5517:             int len = 1;
                   5518:             if (eptr >= md->end_subject)
                   5519:               {
                   5520:               SCHECK_PARTIAL();
                   5521:               break;
                   5522:               }
                   5523:             GETCHARLEN(c, eptr, len);
                   5524:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5525:             eptr+= len;
                   5526:             }
                   5527:           break;
                   5528: 
                   5529:           case OP_NOT_WORDCHAR:
                   5530:           for (i = min; i < max; i++)
                   5531:             {
                   5532:             int len = 1;
                   5533:             if (eptr >= md->end_subject)
                   5534:               {
                   5535:               SCHECK_PARTIAL();
                   5536:               break;
                   5537:               }
                   5538:             GETCHARLEN(c, eptr, len);
                   5539:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5540:             eptr+= len;
                   5541:             }
                   5542:           break;
                   5543: 
                   5544:           case OP_WORDCHAR:
                   5545:           for (i = min; i < max; i++)
                   5546:             {
                   5547:             int len = 1;
                   5548:             if (eptr >= md->end_subject)
                   5549:               {
                   5550:               SCHECK_PARTIAL();
                   5551:               break;
                   5552:               }
                   5553:             GETCHARLEN(c, eptr, len);
                   5554:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5555:             eptr+= len;
                   5556:             }
                   5557:           break;
                   5558: 
                   5559:           default:
                   5560:           RRETURN(PCRE_ERROR_INTERNAL);
                   5561:           }
                   5562: 
                   5563:         /* eptr is now past the end of the maximum run. If possessive, we are
                   5564:         done (no backing up). Otherwise, match at this position; anything other
                   5565:         than no match is immediately returned. For nomatch, back up one
                   5566:         character, unless we are matching \R and the last thing matched was
                   5567:         \r\n, in which case, back up two bytes. */
                   5568: 
                   5569:         if (possessive) continue;
                   5570:         for(;;)
                   5571:           {
                   5572:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
                   5573:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5574:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5575:           BACKCHAR(eptr);
                   5576:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
                   5577:               eptr[-1] == '\r') eptr--;
                   5578:           }
                   5579:         }
                   5580:       else
                   5581: #endif  /* SUPPORT_UTF8 */
                   5582: 
                   5583:       /* Not UTF-8 mode */
                   5584:         {
                   5585:         switch(ctype)
                   5586:           {
                   5587:           case OP_ANY:
                   5588:           for (i = min; i < max; i++)
                   5589:             {
                   5590:             if (eptr >= md->end_subject)
                   5591:               {
                   5592:               SCHECK_PARTIAL();
                   5593:               break;
                   5594:               }
                   5595:             if (IS_NEWLINE(eptr)) break;
                   5596:             eptr++;
                   5597:             }
                   5598:           break;
                   5599: 
                   5600:           case OP_ALLANY:
                   5601:           case OP_ANYBYTE:
                   5602:           c = max - min;
                   5603:           if (c > (unsigned int)(md->end_subject - eptr))
                   5604:             {
                   5605:             eptr = md->end_subject;
                   5606:             SCHECK_PARTIAL();
                   5607:             }
                   5608:           else eptr += c;
                   5609:           break;
                   5610: 
                   5611:           case OP_ANYNL:
                   5612:           for (i = min; i < max; i++)
                   5613:             {
                   5614:             if (eptr >= md->end_subject)
                   5615:               {
                   5616:               SCHECK_PARTIAL();
                   5617:               break;
                   5618:               }
                   5619:             c = *eptr;
                   5620:             if (c == 0x000d)
                   5621:               {
                   5622:               if (++eptr >= md->end_subject) break;
                   5623:               if (*eptr == 0x000a) eptr++;
                   5624:               }
                   5625:             else
                   5626:               {
                   5627:               if (c != 0x000a &&
                   5628:                   (md->bsr_anycrlf ||
                   5629:                     (c != 0x000b && c != 0x000c && c != 0x0085)))
                   5630:                 break;
                   5631:               eptr++;
                   5632:               }
                   5633:             }
                   5634:           break;
                   5635: 
                   5636:           case OP_NOT_HSPACE:
                   5637:           for (i = min; i < max; i++)
                   5638:             {
                   5639:             if (eptr >= md->end_subject)
                   5640:               {
                   5641:               SCHECK_PARTIAL();
                   5642:               break;
                   5643:               }
                   5644:             c = *eptr;
                   5645:             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
                   5646:             eptr++;
                   5647:             }
                   5648:           break;
                   5649: 
                   5650:           case OP_HSPACE:
                   5651:           for (i = min; i < max; i++)
                   5652:             {
                   5653:             if (eptr >= md->end_subject)
                   5654:               {
                   5655:               SCHECK_PARTIAL();
                   5656:               break;
                   5657:               }
                   5658:             c = *eptr;
                   5659:             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
                   5660:             eptr++;
                   5661:             }
                   5662:           break;
                   5663: 
                   5664:           case OP_NOT_VSPACE:
                   5665:           for (i = min; i < max; i++)
                   5666:             {
                   5667:             if (eptr >= md->end_subject)
                   5668:               {
                   5669:               SCHECK_PARTIAL();
                   5670:               break;
                   5671:               }
                   5672:             c = *eptr;
                   5673:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
                   5674:               break;
                   5675:             eptr++;
                   5676:             }
                   5677:           break;
                   5678: 
                   5679:           case OP_VSPACE:
                   5680:           for (i = min; i < max; i++)
                   5681:             {
                   5682:             if (eptr >= md->end_subject)
                   5683:               {
                   5684:               SCHECK_PARTIAL();
                   5685:               break;
                   5686:               }
                   5687:             c = *eptr;
                   5688:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
                   5689:               break;
                   5690:             eptr++;
                   5691:             }
                   5692:           break;
                   5693: 
                   5694:           case OP_NOT_DIGIT:
                   5695:           for (i = min; i < max; i++)
                   5696:             {
                   5697:             if (eptr >= md->end_subject)
                   5698:               {
                   5699:               SCHECK_PARTIAL();
                   5700:               break;
                   5701:               }
                   5702:             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
                   5703:             eptr++;
                   5704:             }
                   5705:           break;
                   5706: 
                   5707:           case OP_DIGIT:
                   5708:           for (i = min; i < max; i++)
                   5709:             {
                   5710:             if (eptr >= md->end_subject)
                   5711:               {
                   5712:               SCHECK_PARTIAL();
                   5713:               break;
                   5714:               }
                   5715:             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
                   5716:             eptr++;
                   5717:             }
                   5718:           break;
                   5719: 
                   5720:           case OP_NOT_WHITESPACE:
                   5721:           for (i = min; i < max; i++)
                   5722:             {
                   5723:             if (eptr >= md->end_subject)
                   5724:               {
                   5725:               SCHECK_PARTIAL();
                   5726:               break;
                   5727:               }
                   5728:             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
                   5729:             eptr++;
                   5730:             }
                   5731:           break;
                   5732: 
                   5733:           case OP_WHITESPACE:
                   5734:           for (i = min; i < max; i++)
                   5735:             {
                   5736:             if (eptr >= md->end_subject)
                   5737:               {
                   5738:               SCHECK_PARTIAL();
                   5739:               break;
                   5740:               }
                   5741:             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
                   5742:             eptr++;
                   5743:             }
                   5744:           break;
                   5745: 
                   5746:           case OP_NOT_WORDCHAR:
                   5747:           for (i = min; i < max; i++)
                   5748:             {
                   5749:             if (eptr >= md->end_subject)
                   5750:               {
                   5751:               SCHECK_PARTIAL();
                   5752:               break;
                   5753:               }
                   5754:             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
                   5755:             eptr++;
                   5756:             }
                   5757:           break;
                   5758: 
                   5759:           case OP_WORDCHAR:
                   5760:           for (i = min; i < max; i++)
                   5761:             {
                   5762:             if (eptr >= md->end_subject)
                   5763:               {
                   5764:               SCHECK_PARTIAL();
                   5765:               break;
                   5766:               }
                   5767:             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
                   5768:             eptr++;
                   5769:             }
                   5770:           break;
                   5771: 
                   5772:           default:
                   5773:           RRETURN(PCRE_ERROR_INTERNAL);
                   5774:           }
                   5775: 
                   5776:         /* eptr is now past the end of the maximum run. If possessive, we are
                   5777:         done (no backing up). Otherwise, match at this position; anything other
                   5778:         than no match is immediately returned. For nomatch, back up one
                   5779:         character (byte), unless we are matching \R and the last thing matched
                   5780:         was \r\n, in which case, back up two bytes. */
                   5781: 
                   5782:         if (possessive) continue;
                   5783:         while (eptr >= pp)
                   5784:           {
                   5785:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
                   5786:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5787:           eptr--;
                   5788:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
                   5789:               eptr[-1] == '\r') eptr--;
                   5790:           }
                   5791:         }
                   5792: 
                   5793:       /* Get here if we can't make it match with any permitted repetitions */
                   5794: 
                   5795:       RRETURN(MATCH_NOMATCH);
                   5796:       }
                   5797:     /* Control never gets here */
                   5798: 
                   5799:     /* There's been some horrible disaster. Arrival here can only mean there is
                   5800:     something seriously wrong in the code above or the OP_xxx definitions. */
                   5801: 
                   5802:     default:
                   5803:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   5804:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   5805:     }
                   5806: 
                   5807:   /* Do not stick any code in here without much thought; it is assumed
                   5808:   that "continue" in the code above comes out to here to repeat the main
                   5809:   loop. */
                   5810: 
                   5811:   }             /* End of main loop */
                   5812: /* Control never reaches here */
                   5813: 
                   5814: 
                   5815: /* When compiling to use the heap rather than the stack for recursive calls to
                   5816: match(), the RRETURN() macro jumps here. The number that is saved in
                   5817: frame->Xwhere indicates which label we actually want to return to. */
                   5818: 
                   5819: #ifdef NO_RECURSE
                   5820: #define LBL(val) case val: goto L_RM##val;
                   5821: HEAP_RETURN:
                   5822: switch (frame->Xwhere)
                   5823:   {
                   5824:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   5825:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   5826:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   5827:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
                   5828:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
                   5829:   LBL(65) LBL(66)
                   5830: #ifdef SUPPORT_UTF8
                   5831:   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
                   5832:   LBL(32) LBL(34) LBL(42) LBL(46)
                   5833: #ifdef SUPPORT_UCP
                   5834:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
                   5835:   LBL(59) LBL(60) LBL(61) LBL(62)
                   5836: #endif  /* SUPPORT_UCP */
                   5837: #endif  /* SUPPORT_UTF8 */
                   5838:   default:
                   5839:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   5840:   return PCRE_ERROR_INTERNAL;
                   5841:   }
                   5842: #undef LBL
                   5843: #endif  /* NO_RECURSE */
                   5844: }
                   5845: 
                   5846: 
                   5847: /***************************************************************************
                   5848: ****************************************************************************
                   5849:                    RECURSION IN THE match() FUNCTION
                   5850: 
                   5851: Undefine all the macros that were defined above to handle this. */
                   5852: 
                   5853: #ifdef NO_RECURSE
                   5854: #undef eptr
                   5855: #undef ecode
                   5856: #undef mstart
                   5857: #undef offset_top
                   5858: #undef eptrb
                   5859: #undef flags
                   5860: 
                   5861: #undef callpat
                   5862: #undef charptr
                   5863: #undef data
                   5864: #undef next
                   5865: #undef pp
                   5866: #undef prev
                   5867: #undef saved_eptr
                   5868: 
                   5869: #undef new_recursive
                   5870: 
                   5871: #undef cur_is_word
                   5872: #undef condition
                   5873: #undef prev_is_word
                   5874: 
                   5875: #undef ctype
                   5876: #undef length
                   5877: #undef max
                   5878: #undef min
                   5879: #undef number
                   5880: #undef offset
                   5881: #undef op
                   5882: #undef save_capture_last
                   5883: #undef save_offset1
                   5884: #undef save_offset2
                   5885: #undef save_offset3
                   5886: #undef stacksave
                   5887: 
                   5888: #undef newptrb
                   5889: 
                   5890: #endif
                   5891: 
                   5892: /* These two are defined as macros in both cases */
                   5893: 
                   5894: #undef fc
                   5895: #undef fi
                   5896: 
                   5897: /***************************************************************************
                   5898: ***************************************************************************/
                   5899: 
                   5900: 
                   5901: 
                   5902: /*************************************************
                   5903: *         Execute a Regular Expression           *
                   5904: *************************************************/
                   5905: 
                   5906: /* This function applies a compiled re to a subject string and picks out
                   5907: portions of the string if it matches. Two elements in the vector are set for
                   5908: each substring: the offsets to the start and end of the substring.
                   5909: 
                   5910: Arguments:
                   5911:   argument_re     points to the compiled expression
                   5912:   extra_data      points to extra data or is NULL
                   5913:   subject         points to the subject string
                   5914:   length          length of subject string (may contain binary zeros)
                   5915:   start_offset    where to start in the subject string
                   5916:   options         option bits
                   5917:   offsets         points to a vector of ints to be filled in with offsets
                   5918:   offsetcount     the number of elements in the vector
                   5919: 
                   5920: Returns:          > 0 => success; value is the number of elements filled in
                   5921:                   = 0 => success, but offsets is not big enough
                   5922:                    -1 => failed to match
                   5923:                  < -1 => some kind of unexpected problem
                   5924: */
                   5925: 
                   5926: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   5927: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   5928:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   5929:   int offsetcount)
                   5930: {
                   5931: int rc, ocount, arg_offset_max;
                   5932: int first_byte = -1;
                   5933: int req_byte = -1;
                   5934: int req_byte2 = -1;
                   5935: int newline;
                   5936: BOOL using_temporary_offsets = FALSE;
                   5937: BOOL anchored;
                   5938: BOOL startline;
                   5939: BOOL firstline;
                   5940: BOOL first_byte_caseless = FALSE;
                   5941: BOOL req_byte_caseless = FALSE;
                   5942: BOOL utf8;
                   5943: match_data match_block;
                   5944: match_data *md = &match_block;
                   5945: const uschar *tables;
                   5946: const uschar *start_bits = NULL;
                   5947: USPTR start_match = (USPTR)subject + start_offset;
                   5948: USPTR end_subject;
                   5949: USPTR start_partial = NULL;
                   5950: USPTR req_byte_ptr = start_match - 1;
                   5951: 
                   5952: pcre_study_data internal_study;
                   5953: const pcre_study_data *study;
                   5954: 
                   5955: real_pcre internal_re;
                   5956: const real_pcre *external_re = (const real_pcre *)argument_re;
                   5957: const real_pcre *re = external_re;
                   5958: 
                   5959: /* Plausibility checks */
                   5960: 
                   5961: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   5962: if (re == NULL || subject == NULL ||
                   5963:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   5964: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   5965: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   5966: 
                   5967: /* These two settings are used in the code for checking a UTF-8 string that
                   5968: follows immediately afterwards. Other values in the md block are used only
                   5969: during "normal" pcre_exec() processing, not when the JIT support is in use,
                   5970: so they are set up later. */
                   5971: 
                   5972: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
                   5973: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
                   5974:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
                   5975: 
                   5976: /* Check a UTF-8 string if required. Pass back the character offset and error
                   5977: code for an invalid string if a results vector is available. */
                   5978: 
                   5979: #ifdef SUPPORT_UTF8
                   5980: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   5981:   {
                   5982:   int erroroffset;
                   5983:   int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
                   5984:   if (errorcode != 0)
                   5985:     {
                   5986:     if (offsetcount >= 2)
                   5987:       {
                   5988:       offsets[0] = erroroffset;
                   5989:       offsets[1] = errorcode;
                   5990:       }
                   5991:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
                   5992:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
                   5993:     }
                   5994: 
                   5995:   /* Check that a start_offset points to the start of a UTF-8 character. */
                   5996:   if (start_offset > 0 && start_offset < length &&
                   5997:       (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
                   5998:     return PCRE_ERROR_BADUTF8_OFFSET;
                   5999:   }
                   6000: #endif
                   6001: 
                   6002: /* If the pattern was successfully studied with JIT support, run the JIT
                   6003: executable instead of the rest of this function. Most options must be set at
                   6004: compile time for the JIT code to be usable. Fallback to the normal code path if
                   6005: an unsupported flag is set. In particular, JIT does not support partial
                   6006: matching. */
                   6007: 
                   6008: #ifdef SUPPORT_JIT
                   6009: if (extra_data != NULL
                   6010:     && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
                   6011:     && extra_data->executable_jit != NULL
                   6012:     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
                   6013:     && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
                   6014:                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
                   6015:   return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
                   6016:     start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
                   6017:     ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
                   6018: #endif
                   6019: 
                   6020: /* Carry on with non-JIT matching. This information is for finding all the
                   6021: numbers associated with a given name, for condition testing. */
                   6022: 
                   6023: md->name_table = (uschar *)re + re->name_table_offset;
                   6024: md->name_count = re->name_count;
                   6025: md->name_entry_size = re->name_entry_size;
                   6026: 
                   6027: /* Fish out the optional data from the extra_data structure, first setting
                   6028: the default values. */
                   6029: 
                   6030: study = NULL;
                   6031: md->match_limit = MATCH_LIMIT;
                   6032: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   6033: md->callout_data = NULL;
                   6034: 
                   6035: /* The table pointer is always in native byte order. */
                   6036: 
                   6037: tables = external_re->tables;
                   6038: 
                   6039: if (extra_data != NULL)
                   6040:   {
                   6041:   register unsigned int flags = extra_data->flags;
                   6042:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   6043:     study = (const pcre_study_data *)extra_data->study_data;
                   6044:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   6045:     md->match_limit = extra_data->match_limit;
                   6046:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   6047:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   6048:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   6049:     md->callout_data = extra_data->callout_data;
                   6050:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   6051:   }
                   6052: 
                   6053: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   6054: is a feature that makes it possible to save compiled regex and re-use them
                   6055: in other programs later. */
                   6056: 
                   6057: if (tables == NULL) tables = _pcre_default_tables;
                   6058: 
                   6059: /* Check that the first field in the block is the magic number. If it is not,
                   6060: test for a regex that was compiled on a host of opposite endianness. If this is
                   6061: the case, flipped values are put in internal_re and internal_study if there was
                   6062: study data too. */
                   6063: 
                   6064: if (re->magic_number != MAGIC_NUMBER)
                   6065:   {
                   6066:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   6067:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   6068:   if (study != NULL) study = &internal_study;
                   6069:   }
                   6070: 
                   6071: /* Set up other data */
                   6072: 
                   6073: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   6074: startline = (re->flags & PCRE_STARTLINE) != 0;
                   6075: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   6076: 
                   6077: /* The code starts after the real_pcre block and the capture name table. */
                   6078: 
                   6079: md->start_code = (const uschar *)external_re + re->name_table_offset +
                   6080:   re->name_count * re->name_entry_size;
                   6081: 
                   6082: md->start_subject = (USPTR)subject;
                   6083: md->start_offset = start_offset;
                   6084: md->end_subject = md->start_subject + length;
                   6085: end_subject = md->end_subject;
                   6086: 
                   6087: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   6088: md->use_ucp = (re->options & PCRE_UCP) != 0;
                   6089: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   6090: md->ignore_skip_arg = FALSE;
                   6091: 
                   6092: /* Some options are unpacked into BOOL variables in the hope that testing
                   6093: them will be faster than individual option bits. */
                   6094: 
                   6095: md->notbol = (options & PCRE_NOTBOL) != 0;
                   6096: md->noteol = (options & PCRE_NOTEOL) != 0;
                   6097: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   6098: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
                   6099: 
                   6100: md->hitend = FALSE;
                   6101: md->mark = md->nomatch_mark = NULL;     /* In case never set */
                   6102: 
                   6103: md->recursive = NULL;                   /* No recursion at top level */
                   6104: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
                   6105: 
                   6106: md->lcc = tables + lcc_offset;
                   6107: md->ctypes = tables + ctypes_offset;
                   6108: 
                   6109: /* Handle different \R options. */
                   6110: 
                   6111: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   6112:   {
                   6113:   case 0:
                   6114:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   6115:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   6116:   else
                   6117: #ifdef BSR_ANYCRLF
                   6118:   md->bsr_anycrlf = TRUE;
                   6119: #else
                   6120:   md->bsr_anycrlf = FALSE;
                   6121: #endif
                   6122:   break;
                   6123: 
                   6124:   case PCRE_BSR_ANYCRLF:
                   6125:   md->bsr_anycrlf = TRUE;
                   6126:   break;
                   6127: 
                   6128:   case PCRE_BSR_UNICODE:
                   6129:   md->bsr_anycrlf = FALSE;
                   6130:   break;
                   6131: 
                   6132:   default: return PCRE_ERROR_BADNEWLINE;
                   6133:   }
                   6134: 
                   6135: /* Handle different types of newline. The three bits give eight cases. If
                   6136: nothing is set at run time, whatever was used at compile time applies. */
                   6137: 
                   6138: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   6139:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   6140:   {
                   6141:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   6142:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   6143:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   6144:   case PCRE_NEWLINE_CR+
                   6145:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   6146:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   6147:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   6148:   default: return PCRE_ERROR_BADNEWLINE;
                   6149:   }
                   6150: 
                   6151: if (newline == -2)
                   6152:   {
                   6153:   md->nltype = NLTYPE_ANYCRLF;
                   6154:   }
                   6155: else if (newline < 0)
                   6156:   {
                   6157:   md->nltype = NLTYPE_ANY;
                   6158:   }
                   6159: else
                   6160:   {
                   6161:   md->nltype = NLTYPE_FIXED;
                   6162:   if (newline > 255)
                   6163:     {
                   6164:     md->nllen = 2;
                   6165:     md->nl[0] = (newline >> 8) & 255;
                   6166:     md->nl[1] = newline & 255;
                   6167:     }
                   6168:   else
                   6169:     {
                   6170:     md->nllen = 1;
                   6171:     md->nl[0] = newline;
                   6172:     }
                   6173:   }
                   6174: 
                   6175: /* Partial matching was originally supported only for a restricted set of
                   6176: regexes; from release 8.00 there are no restrictions, but the bits are still
                   6177: defined (though never set). So there's no harm in leaving this code. */
                   6178: 
                   6179: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   6180:   return PCRE_ERROR_BADPARTIAL;
                   6181: 
                   6182: /* If the expression has got more back references than the offsets supplied can
                   6183: hold, we get a temporary chunk of working store to use during the matching.
                   6184: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   6185: of 3. */
                   6186: 
                   6187: ocount = offsetcount - (offsetcount % 3);
                   6188: arg_offset_max = (2*ocount)/3;
                   6189: 
                   6190: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   6191:   {
                   6192:   ocount = re->top_backref * 3 + 3;
                   6193:   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
                   6194:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   6195:   using_temporary_offsets = TRUE;
                   6196:   DPRINTF(("Got memory to hold back references\n"));
                   6197:   }
                   6198: else md->offset_vector = offsets;
                   6199: 
                   6200: md->offset_end = ocount;
                   6201: md->offset_max = (2*ocount)/3;
                   6202: md->offset_overflow = FALSE;
                   6203: md->capture_last = -1;
                   6204: 
                   6205: /* Reset the working variable associated with each extraction. These should
                   6206: never be used unless previously set, but they get saved and restored, and so we
                   6207: initialize them to avoid reading uninitialized locations. Also, unset the
                   6208: offsets for the matched string. This is really just for tidiness with callouts,
                   6209: in case they inspect these fields. */
                   6210: 
                   6211: if (md->offset_vector != NULL)
                   6212:   {
                   6213:   register int *iptr = md->offset_vector + ocount;
                   6214:   register int *iend = iptr - re->top_bracket;
                   6215:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
                   6216:   while (--iptr >= iend) *iptr = -1;
                   6217:   md->offset_vector[0] = md->offset_vector[1] = -1;
                   6218:   }
                   6219: 
                   6220: /* Set up the first character to match, if available. The first_byte value is
                   6221: never set for an anchored regular expression, but the anchoring may be forced
                   6222: at run time, so we have to test for anchoring. The first char may be unset for
                   6223: an unanchored pattern, of course. If there's no first char and the pattern was
                   6224: studied, there may be a bitmap of possible first characters. */
                   6225: 
                   6226: if (!anchored)
                   6227:   {
                   6228:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   6229:     {
                   6230:     first_byte = re->first_byte & 255;
                   6231:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   6232:       first_byte = md->lcc[first_byte];
                   6233:     }
                   6234:   else
                   6235:     if (!startline && study != NULL &&
                   6236:       (study->flags & PCRE_STUDY_MAPPED) != 0)
                   6237:         start_bits = study->start_bits;
                   6238:   }
                   6239: 
                   6240: /* For anchored or unanchored matches, there may be a "last known required
                   6241: character" set. */
                   6242: 
                   6243: if ((re->flags & PCRE_REQCHSET) != 0)
                   6244:   {
                   6245:   req_byte = re->req_byte & 255;
                   6246:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   6247:   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
                   6248:   }
                   6249: 
                   6250: 
                   6251: 
                   6252: 
                   6253: /* ==========================================================================*/
                   6254: 
                   6255: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   6256: the loop runs just once. */
                   6257: 
                   6258: for(;;)
                   6259:   {
                   6260:   USPTR save_end_subject = end_subject;
                   6261:   USPTR new_start_match;
                   6262: 
                   6263:   /* If firstline is TRUE, the start of the match is constrained to the first
                   6264:   line of a multiline string. That is, the match must be before or at the first
                   6265:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   6266:   scanning at a newline. If the match fails at the newline, later code breaks
                   6267:   this loop. */
                   6268: 
                   6269:   if (firstline)
                   6270:     {
                   6271:     USPTR t = start_match;
                   6272: #ifdef SUPPORT_UTF8
                   6273:     if (utf8)
                   6274:       {
                   6275:       while (t < md->end_subject && !IS_NEWLINE(t))
                   6276:         {
                   6277:         t++;
                   6278:         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
                   6279:         }
                   6280:       }
                   6281:     else
                   6282: #endif
                   6283:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   6284:     end_subject = t;
                   6285:     }
                   6286: 
                   6287:   /* There are some optimizations that avoid running the match if a known
                   6288:   starting point is not found, or if a known later character is not present.
                   6289:   However, there is an option that disables these, for testing and for ensuring
                   6290:   that all callouts do actually occur. The option can be set in the regex by
                   6291:   (*NO_START_OPT) or passed in match-time options. */
                   6292: 
                   6293:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   6294:     {
                   6295:     /* Advance to a unique first byte if there is one. */
                   6296: 
                   6297:     if (first_byte >= 0)
                   6298:       {
                   6299:       if (first_byte_caseless)
                   6300:         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
                   6301:           start_match++;
                   6302:       else
                   6303:         while (start_match < end_subject && *start_match != first_byte)
                   6304:           start_match++;
                   6305:       }
                   6306: 
                   6307:     /* Or to just after a linebreak for a multiline match */
                   6308: 
                   6309:     else if (startline)
                   6310:       {
                   6311:       if (start_match > md->start_subject + start_offset)
                   6312:         {
                   6313: #ifdef SUPPORT_UTF8
                   6314:         if (utf8)
                   6315:           {
                   6316:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6317:             {
                   6318:             start_match++;
                   6319:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   6320:               start_match++;
                   6321:             }
                   6322:           }
                   6323:         else
                   6324: #endif
                   6325:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6326:           start_match++;
                   6327: 
                   6328:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   6329:         and we are now at a LF, advance the match position by one more character.
                   6330:         */
                   6331: 
                   6332:         if (start_match[-1] == CHAR_CR &&
                   6333:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   6334:              start_match < end_subject &&
                   6335:              *start_match == CHAR_NL)
                   6336:           start_match++;
                   6337:         }
                   6338:       }
                   6339: 
                   6340:     /* Or to a non-unique first byte after study */
                   6341: 
                   6342:     else if (start_bits != NULL)
                   6343:       {
                   6344:       while (start_match < end_subject)
                   6345:         {
                   6346:         register unsigned int c = *start_match;
                   6347:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   6348:           {
                   6349:           start_match++;
                   6350: #ifdef SUPPORT_UTF8
                   6351:           if (utf8)
                   6352:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   6353:               start_match++;
                   6354: #endif
                   6355:           }
                   6356:         else break;
                   6357:         }
                   6358:       }
                   6359:     }   /* Starting optimizations */
                   6360: 
                   6361:   /* Restore fudged end_subject */
                   6362: 
                   6363:   end_subject = save_end_subject;
                   6364: 
                   6365:   /* The following two optimizations are disabled for partial matching or if
                   6366:   disabling is explicitly requested. */
                   6367: 
                   6368:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
                   6369:     {
                   6370:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6371:     a lower bound; no actual string of that length may actually match the
                   6372:     pattern. Although the value is, strictly, in characters, we treat it as
                   6373:     bytes to avoid spending too much time in this optimization. */
                   6374: 
                   6375:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6376:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6377:       {
                   6378:       rc = MATCH_NOMATCH;
                   6379:       break;
                   6380:       }
                   6381: 
                   6382:     /* If req_byte is set, we know that that character must appear in the
                   6383:     subject for the match to succeed. If the first character is set, req_byte
                   6384:     must be later in the subject; otherwise the test starts at the match point.
                   6385:     This optimization can save a huge amount of backtracking in patterns with
                   6386:     nested unlimited repeats that aren't going to match. Writing separate code
                   6387:     for cased/caseless versions makes it go faster, as does using an
                   6388:     autoincrement and backing off on a match.
                   6389: 
                   6390:     HOWEVER: when the subject string is very, very long, searching to its end
                   6391:     can take a long time, and give bad performance on quite ordinary patterns.
                   6392:     This showed up when somebody was matching something like /^\d+C/ on a
                   6393:     32-megabyte string... so we don't do this when the string is sufficiently
                   6394:     long. */
                   6395: 
                   6396:     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
                   6397:       {
                   6398:       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
                   6399: 
                   6400:       /* We don't need to repeat the search if we haven't yet reached the
                   6401:       place we found it at last time. */
                   6402: 
                   6403:       if (p > req_byte_ptr)
                   6404:         {
                   6405:         if (req_byte_caseless)
                   6406:           {
                   6407:           while (p < end_subject)
                   6408:             {
                   6409:             register int pp = *p++;
                   6410:             if (pp == req_byte || pp == req_byte2) { p--; break; }
                   6411:             }
                   6412:           }
                   6413:         else
                   6414:           {
                   6415:           while (p < end_subject)
                   6416:             {
                   6417:             if (*p++ == req_byte) { p--; break; }
                   6418:             }
                   6419:           }
                   6420: 
                   6421:         /* If we can't find the required character, break the matching loop,
                   6422:         forcing a match failure. */
                   6423: 
                   6424:         if (p >= end_subject)
                   6425:           {
                   6426:           rc = MATCH_NOMATCH;
                   6427:           break;
                   6428:           }
                   6429: 
                   6430:         /* If we have found the required character, save the point where we
                   6431:         found it, so that we don't search again next time round the loop if
                   6432:         the start hasn't passed this character yet. */
                   6433: 
                   6434:         req_byte_ptr = p;
                   6435:         }
                   6436:       }
                   6437:     }
                   6438: 
                   6439: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6440:   printf(">>>> Match against: ");
                   6441:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6442:   printf("\n");
                   6443: #endif
                   6444: 
                   6445:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6446:   first starting point for which a partial match was found. */
                   6447: 
                   6448:   md->start_match_ptr = start_match;
                   6449:   md->start_used_ptr = start_match;
                   6450:   md->match_call_count = 0;
                   6451:   md->match_function_type = 0;
                   6452:   md->end_offset_top = 0;
                   6453:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
                   6454:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
                   6455: 
                   6456:   switch(rc)
                   6457:     {
                   6458:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
                   6459:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
                   6460:     entirely. The only way we can do that is to re-do the match at the same
                   6461:     point, with a flag to force SKIP with an argument to be ignored. Just
                   6462:     treating this case as NOMATCH does not work because it does not check other
                   6463:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
                   6464: 
                   6465:     case MATCH_SKIP_ARG:
                   6466:     new_start_match = start_match;
                   6467:     md->ignore_skip_arg = TRUE;
                   6468:     break;
                   6469: 
                   6470:     /* SKIP passes back the next starting point explicitly, but if it is the
                   6471:     same as the match we have just done, treat it as NOMATCH. */
                   6472: 
                   6473:     case MATCH_SKIP:
                   6474:     if (md->start_match_ptr != start_match)
                   6475:       {
                   6476:       new_start_match = md->start_match_ptr;
                   6477:       break;
                   6478:       }
                   6479:     /* Fall through */
                   6480: 
                   6481:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   6482:     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
                   6483: 
                   6484:     case MATCH_NOMATCH:
                   6485:     case MATCH_PRUNE:
                   6486:     case MATCH_THEN:
                   6487:     md->ignore_skip_arg = FALSE;
                   6488:     new_start_match = start_match + 1;
                   6489: #ifdef SUPPORT_UTF8
                   6490:     if (utf8)
                   6491:       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
                   6492:         new_start_match++;
                   6493: #endif
                   6494:     break;
                   6495: 
                   6496:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6497: 
                   6498:     case MATCH_COMMIT:
                   6499:     rc = MATCH_NOMATCH;
                   6500:     goto ENDLOOP;
                   6501: 
                   6502:     /* Any other return is either a match, or some kind of error. */
                   6503: 
                   6504:     default:
                   6505:     goto ENDLOOP;
                   6506:     }
                   6507: 
                   6508:   /* Control reaches here for the various types of "no match at this point"
                   6509:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6510: 
                   6511:   rc = MATCH_NOMATCH;
                   6512: 
                   6513:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6514:   newline in the subject (though it may continue over the newline). Therefore,
                   6515:   if we have just failed to match, starting at a newline, do not continue. */
                   6516: 
                   6517:   if (firstline && IS_NEWLINE(start_match)) break;
                   6518: 
                   6519:   /* Advance to new matching position */
                   6520: 
                   6521:   start_match = new_start_match;
                   6522: 
                   6523:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6524:   the subject. */
                   6525: 
                   6526:   if (anchored || start_match > end_subject) break;
                   6527: 
                   6528:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6529:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   6530:   or ANY or ANYCRLF, advance the match position by one more character. */
                   6531: 
                   6532:   if (start_match[-1] == CHAR_CR &&
                   6533:       start_match < end_subject &&
                   6534:       *start_match == CHAR_NL &&
                   6535:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   6536:         (md->nltype == NLTYPE_ANY ||
                   6537:          md->nltype == NLTYPE_ANYCRLF ||
                   6538:          md->nllen == 2))
                   6539:     start_match++;
                   6540: 
                   6541:   md->mark = NULL;   /* Reset for start of next match attempt */
                   6542:   }                  /* End of for(;;) "bumpalong" loop */
                   6543: 
                   6544: /* ==========================================================================*/
                   6545: 
                   6546: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   6547: conditions is true:
                   6548: 
                   6549: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   6550: 
                   6551: (2) We are past the end of the subject;
                   6552: 
                   6553: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   6554:     this option requests that a match occur at or before the first newline in
                   6555:     the subject.
                   6556: 
                   6557: When we have a match and the offset vector is big enough to deal with any
                   6558: backreferences, captured substring offsets will already be set up. In the case
                   6559: where we had to get some local store to hold offsets for backreference
                   6560: processing, copy those that we can. In this case there need not be overflow if
                   6561: certain parts of the pattern were not used, even though there are more
                   6562: capturing parentheses than vector slots. */
                   6563: 
                   6564: ENDLOOP:
                   6565: 
                   6566: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
                   6567:   {
                   6568:   if (using_temporary_offsets)
                   6569:     {
                   6570:     if (arg_offset_max >= 4)
                   6571:       {
                   6572:       memcpy(offsets + 2, md->offset_vector + 2,
                   6573:         (arg_offset_max - 2) * sizeof(int));
                   6574:       DPRINTF(("Copied offsets from temporary memory\n"));
                   6575:       }
                   6576:     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
                   6577:     DPRINTF(("Freeing temporary memory\n"));
                   6578:     (pcre_free)(md->offset_vector);
                   6579:     }
                   6580: 
                   6581:   /* Set the return code to the number of captured strings, or 0 if there were
                   6582:   too many to fit into the vector. */
                   6583: 
                   6584:   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
                   6585:     0 : md->end_offset_top/2;
                   6586: 
                   6587:   /* If there is space in the offset vector, set any unused pairs at the end of
                   6588:   the pattern to -1 for backwards compatibility. It is documented that this
                   6589:   happens. In earlier versions, the whole set of potential capturing offsets
                   6590:   was set to -1 each time round the loop, but this is handled differently now.
                   6591:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
                   6592:   those at the end that need unsetting here. We can't just unset them all at
                   6593:   the start of the whole thing because they may get set in one branch that is
                   6594:   not the final matching branch. */
                   6595: 
                   6596:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
                   6597:     {
                   6598:     register int *iptr, *iend;
                   6599:     int resetcount = 2 + re->top_bracket * 2;
                   6600:     if (resetcount > offsetcount) resetcount = ocount;
                   6601:     iptr = offsets + md->end_offset_top;
                   6602:     iend = offsets + resetcount;
                   6603:     while (iptr < iend) *iptr++ = -1;
                   6604:     }
                   6605: 
                   6606:   /* If there is space, set up the whole thing as substring 0. The value of
                   6607:   md->start_match_ptr might be modified if \K was encountered on the success
                   6608:   matching path. */
                   6609: 
                   6610:   if (offsetcount < 2) rc = 0; else
                   6611:     {
                   6612:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   6613:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
                   6614:     }
                   6615: 
                   6616:   /* Return MARK data if requested */
                   6617: 
                   6618:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
                   6619:     *(extra_data->mark) = (unsigned char *)(md->mark);
                   6620:   DPRINTF((">>>> returning %d\n", rc));
                   6621:   return rc;
                   6622:   }
                   6623: 
                   6624: /* Control gets here if there has been an error, or if the overall match
                   6625: attempt has failed at all permitted starting positions. */
                   6626: 
                   6627: if (using_temporary_offsets)
                   6628:   {
                   6629:   DPRINTF(("Freeing temporary memory\n"));
                   6630:   (pcre_free)(md->offset_vector);
                   6631:   }
                   6632: 
                   6633: /* For anything other than nomatch or partial match, just return the code. */
                   6634: 
                   6635: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
                   6636:   {
                   6637:   DPRINTF((">>>> error: returning %d\n", rc));
                   6638:   return rc;
                   6639:   }
                   6640: 
                   6641: /* Handle partial matches - disable any mark data */
                   6642: 
                   6643: if (start_partial != NULL)
                   6644:   {
                   6645:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   6646:   md->mark = NULL;
                   6647:   if (offsetcount > 1)
                   6648:     {
                   6649:     offsets[0] = (int)(start_partial - (USPTR)subject);
                   6650:     offsets[1] = (int)(end_subject - (USPTR)subject);
                   6651:     }
                   6652:   rc = PCRE_ERROR_PARTIAL;
                   6653:   }
                   6654: 
                   6655: /* This is the classic nomatch case */
                   6656: 
                   6657: else
                   6658:   {
                   6659:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   6660:   rc = PCRE_ERROR_NOMATCH;
                   6661:   }
                   6662: 
                   6663: /* Return the MARK data if it has been requested. */
                   6664: 
                   6665: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
                   6666:   *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);
                   6667: return rc;
                   6668: }
                   6669: 
                   6670: /* End of pcre_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>